Merge branch '3198-inode-cache' refs #3198
authorPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 11 May 2015 15:57:52 +0000 (11:57 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 11 May 2015 15:57:52 +0000 (11:57 -0400)
197 files changed:
apps/workbench/Gemfile
apps/workbench/Gemfile.lock
apps/workbench/app/assets/images/mouse-move.gif [new file with mode: 0644]
apps/workbench/app/assets/images/pipeline-running.gif [new file with mode: 0644]
apps/workbench/app/assets/javascripts/application.js
apps/workbench/app/assets/javascripts/infinite_scroll.js
apps/workbench/app/assets/javascripts/modal_pager.js [new file with mode: 0644]
apps/workbench/app/assets/javascripts/pipeline_instances.js
apps/workbench/app/assets/javascripts/selection.js.erb
apps/workbench/app/assets/stylesheets/application.css.scss
apps/workbench/app/controllers/actions_controller.rb
apps/workbench/app/controllers/collections_controller.rb
apps/workbench/app/controllers/jobs_controller.rb
apps/workbench/app/controllers/pipeline_instances_controller.rb
apps/workbench/app/controllers/projects_controller.rb
apps/workbench/app/controllers/repositories_controller.rb
apps/workbench/app/controllers/users_controller.rb
apps/workbench/app/helpers/application_helper.rb
apps/workbench/app/helpers/jobs_helper.rb [deleted file]
apps/workbench/app/helpers/pipeline_instances_helper.rb
apps/workbench/app/models/pipeline_instance.rb
apps/workbench/app/models/repository.rb
apps/workbench/app/views/application/_choose.html.erb
apps/workbench/app/views/application/_content.html.erb
apps/workbench/app/views/application/_show_sharing.html.erb
apps/workbench/app/views/application/_title_and_buttons.html.erb
apps/workbench/app/views/collections/_show_files.html.erb
apps/workbench/app/views/collections/hash_matches.html.erb
apps/workbench/app/views/getting_started/_getting_started_popup.html.erb [new file with mode: 0644]
apps/workbench/app/views/layouts/application.html.erb
apps/workbench/app/views/layouts/body.html.erb
apps/workbench/app/views/pipeline_instances/_running_component.html.erb
apps/workbench/app/views/pipeline_instances/_show_components.html.erb
apps/workbench/app/views/pipeline_instances/_show_components_editable.html.erb
apps/workbench/app/views/pipeline_instances/_show_inputs.html.erb
apps/workbench/app/views/pipeline_instances/_show_log.html.erb
apps/workbench/app/views/projects/_index_jobs_and_pipelines.html.erb
apps/workbench/app/views/projects/_show_contents_rows.html.erb
apps/workbench/app/views/projects/_show_description.html.erb
apps/workbench/app/views/projects/_show_tab_contents.html.erb
apps/workbench/app/views/repositories/_repository_breadcrumbs.html.erb [new file with mode: 0644]
apps/workbench/app/views/repositories/show_blob.html.erb [new file with mode: 0644]
apps/workbench/app/views/repositories/show_commit.html.erb [new file with mode: 0644]
apps/workbench/app/views/repositories/show_tree.html.erb [new file with mode: 0644]
apps/workbench/app/views/users/_add_ssh_key_popup.html.erb
apps/workbench/app/views/users/profile.html.erb
apps/workbench/config/application.default.yml
apps/workbench/config/database.yml
apps/workbench/config/routes.rb
apps/workbench/test/controllers/actions_controller_test.rb
apps/workbench/test/controllers/projects_controller_test.rb
apps/workbench/test/controllers/repositories_controller_test.rb
apps/workbench/test/helpers/collections_helper_test.rb
apps/workbench/test/helpers/repository_stub_helper.rb [new file with mode: 0644]
apps/workbench/test/helpers/share_object_helper.rb
apps/workbench/test/integration/anonymous_access_test.rb
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/integration/collections_test.rb
apps/workbench/test/integration/errors_test.rb
apps/workbench/test/integration/integration_test_utils.rb [new file with mode: 0644]
apps/workbench/test/integration/jobs_test.rb
apps/workbench/test/integration/pipeline_instances_test.rb
apps/workbench/test/integration/projects_test.rb
apps/workbench/test/integration/report_issue_test.rb
apps/workbench/test/integration/repositories_browse_test.rb [new file with mode: 0644]
apps/workbench/test/integration/user_manage_account_test.rb
apps/workbench/test/integration/user_profile_test.rb
apps/workbench/test/test_helper.rb
apps/workbench/test/unit/pipeline_instance_test.rb
crunch_scripts/crunchutil/vwd.py
crunch_scripts/run-command
doc/_config.yml
doc/_includes/_navbar_top.liquid
doc/_layouts/default.html.liquid
doc/api/methods/jobs.html.textile.liquid
doc/api/schema/Job.html.textile.liquid
doc/api/schema/Repository.html.textile.liquid
doc/index.html.liquid
doc/install/create-standard-objects.html.textile.liquid
doc/install/install-api-server.html.textile.liquid
doc/install/install-keepstore.html.textile.liquid
doc/install/install-sso.html.textile.liquid
doc/install/install-workbench-app.html.textile.liquid
doc/user/index.html.textile.liquid
doc/user/topics/run-command.html.textile.liquid
docker/api/application.yml.in
docker/keep/run-keep.in
docker/mkimage-debootstrap.sh
docker/sso/Dockerfile
docker/sso/application.yml.in [new file with mode: 0644]
sdk/cli/bin/crunch-job
sdk/go/arvadosclient/arvadosclient.go
sdk/go/arvadosclient/arvadosclient_test.go
sdk/go/keepclient/keepclient.go
sdk/go/keepclient/keepclient_test.go
sdk/go/keepclient/root_sorter.go
sdk/go/keepclient/root_sorter_test.go
sdk/go/keepclient/support.go
sdk/python/arvados/arvfile.py
sdk/python/arvados/collection.py
sdk/python/arvados/commands/arv_copy.py
sdk/python/arvados/errors.py
sdk/python/arvados/keep.py
sdk/python/arvados/retry.py
sdk/python/setup.py
sdk/python/tests/arvados_testutil.py
sdk/python/tests/keepstub.py [new file with mode: 0644]
sdk/python/tests/nginx.conf [new file with mode: 0644]
sdk/python/tests/run_test_server.py
sdk/python/tests/test_arvfile.py
sdk/python/tests/test_collections.py
sdk/python/tests/test_errors.py
sdk/python/tests/test_keep_client.py
sdk/python/tests/test_retry.py
sdk/python/tests/test_stream.py
sdk/ruby/lib/arvados/collection.rb
sdk/ruby/lib/arvados/keep.rb
sdk/ruby/test/sdk_fixtures.rb
sdk/ruby/test/test_collection.rb
sdk/ruby/test/test_keep_manifest.rb
services/api/Gemfile
services/api/Gemfile.lock
services/api/Rakefile
services/api/app/controllers/application_controller.rb
services/api/app/controllers/arvados/v1/collections_controller.rb
services/api/app/controllers/arvados/v1/jobs_controller.rb
services/api/app/controllers/arvados/v1/schema_controller.rb
services/api/app/models/arvados_model.rb
services/api/app/models/collection.rb
services/api/app/models/commit.rb
services/api/app/models/job.rb
services/api/app/models/node.rb
services/api/app/models/repository.rb
services/api/app/models/user.rb
services/api/config/application.default.yml
services/api/config/initializers/load_config.rb [moved from services/api/config/initializers/zz_load_config.rb with 80% similarity]
services/api/config/initializers/preload_all_models.rb [moved from services/api/config/initializers/zz_preload_all_models.rb with 55% similarity]
services/api/db/migrate/20150423145759_no_filenames_in_collection_search_index.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/tasks/delete_old_job_logs.rake [new file with mode: 0644]
services/api/lib/tasks/test_tasks.rake [new file with mode: 0644]
services/api/script/crunch-dispatch.rb
services/api/test/fixtures/api_client_authorizations.yml
services/api/test/fixtures/groups.yml
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/links.yml
services/api/test/fixtures/logs.yml
services/api/test/fixtures/pipeline_instances.yml
services/api/test/fixtures/pipeline_templates.yml
services/api/test/fixtures/repositories.yml
services/api/test/fixtures/users.yml
services/api/test/functional/arvados/v1/collections_controller_test.rb
services/api/test/functional/arvados/v1/commits_controller_test.rb
services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
services/api/test/functional/arvados/v1/jobs_controller_test.rb
services/api/test/functional/arvados/v1/repositories_controller_test.rb
services/api/test/functional/arvados/v1/users_controller_test.rb
services/api/test/helpers/git_test_helper.rb
services/api/test/tasks/delete_old_job_logs_test.rb [new file with mode: 0644]
services/api/test/test.git.tar
services/api/test/test_helper.rb
services/api/test/unit/arvados_model_test.rb
services/api/test/unit/collection_test.rb
services/api/test/unit/commit_test.rb
services/api/test/unit/job_test.rb
services/api/test/unit/repository_test.rb
services/api/test/unit/user_test.rb
services/arv-git-httpd/auth_handler.go
services/arv-git-httpd/basic_auth_test.go
services/arv-git-httpd/server_test.go
services/crunchstat/.gitignore [new file with mode: 0644]
services/crunchstat/crunchstat.go
services/keepproxy/.gitignore [new file with mode: 0644]
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy_test.go
services/keepstore/.gitignore [new file with mode: 0644]
services/keepstore/bufferpool.go [new file with mode: 0644]
services/keepstore/bufferpool_test.go [new file with mode: 0644]
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/keepstore.go
services/keepstore/keepstore_test.go
services/keepstore/logging_router.go
services/keepstore/perms_test.go
services/keepstore/pull_worker.go
services/keepstore/pull_worker_integration_test.go
services/keepstore/pull_worker_test.go
services/keepstore/trash_worker.go
services/keepstore/trash_worker_test.go
services/keepstore/volume.go
services/keepstore/volume_test.go [new file with mode: 0644]
services/keepstore/volume_unix.go
services/keepstore/volume_unix_test.go
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_daemon.py

index b51f674d90f68bfb50d9304068f915e42b04aea4..d4f5e6cf3dd0b1aa37ae2068a0f32f557821b427 100644 (file)
@@ -1,10 +1,9 @@
 source 'https://rubygems.org'
 
 gem 'rails', '~> 4.1.0'
-gem 'arvados', '>= 0.1.20150313191637'
-
-gem 'sqlite3'
+gem 'arvados', '>= 0.1.20150511150219'
 
+gem 'activerecord-nulldb-adapter'
 gem 'multi_json'
 gem 'oj'
 gem 'sass'
index 19b2857358fe208a52e50dcf50953897856f2da1..9fc75509656a23396bde21b34b9f6dfb395818f5 100644 (file)
@@ -30,6 +30,8 @@ GEM
       activemodel (= 4.1.9)
       activesupport (= 4.1.9)
       arel (~> 5.0.0)
+    activerecord-nulldb-adapter (0.3.1)
+      activerecord (>= 2.0.0)
     activesupport (4.1.9)
       i18n (~> 0.6, >= 0.6.9)
       json (~> 1.7, >= 1.7.7)
@@ -40,7 +42,7 @@ GEM
     andand (1.3.3)
     angularjs-rails (1.3.8)
     arel (5.0.1.20140414130214)
-    arvados (0.1.20150313191637)
+    arvados (0.1.20150511150219)
       activesupport (>= 3.2.13)
       andand (~> 1.3, >= 1.3.3)
       google-api-client (~> 0.6.3, >= 0.6.3)
@@ -229,7 +231,6 @@ GEM
       actionpack (>= 3.0)
       activesupport (>= 3.0)
       sprockets (>= 2.8, < 4.0)
-    sqlite3 (1.3.10)
     sshkey (1.6.1)
     therubyracer (0.12.1)
       libv8 (~> 3.16.14.0)
@@ -256,9 +257,10 @@ PLATFORMS
 
 DEPENDENCIES
   RedCloth
+  activerecord-nulldb-adapter
   andand
   angularjs-rails
-  arvados (>= 0.1.20150313191637)
+  arvados (>= 0.1.20150511150219)
   bootstrap-sass (~> 3.1.0)
   bootstrap-tab-history-rails
   bootstrap-x-editable-rails
@@ -292,7 +294,6 @@ DEPENDENCIES
   selenium-webdriver
   simplecov (~> 0.7)
   simplecov-rcov
-  sqlite3
   sshkey
   themes_for_rails!
   therubyracer
diff --git a/apps/workbench/app/assets/images/mouse-move.gif b/apps/workbench/app/assets/images/mouse-move.gif
new file mode 100644 (file)
index 0000000..497b159
Binary files /dev/null and b/apps/workbench/app/assets/images/mouse-move.gif differ
diff --git a/apps/workbench/app/assets/images/pipeline-running.gif b/apps/workbench/app/assets/images/pipeline-running.gif
new file mode 100644 (file)
index 0000000..64e9009
Binary files /dev/null and b/apps/workbench/app/assets/images/pipeline-running.gif differ
index 172ff873e83dd72acd92fdc7977118126da09443..65e856df3a90143a5955def3698acc89784ff438 100644 (file)
@@ -145,6 +145,12 @@ jQuery(function($){
         on('ready ajax:complete', function() {
             // This makes the dialog close on Esc key, obviously.
             $('.modal').attr('tabindex', '-1')
+        }).
+        on('ready', function() {
+            // Need this to trigger input validation/synchronization callbacks because some browsers
+            // auto-fill form fields (e.g., when navigating "back" to a page where some text
+            // had been entered in a search box) without triggering a change or input event.
+            $('input').trigger('input');
         });
 
     HeaderRowFixer = function(selector) {
index 81a3a4639b8c7f63a2b42a416252664d746a6b78..6e467f5594747449af66e89998dadd1e668aa3a6 100644 (file)
@@ -1,3 +1,37 @@
+// infinite_scroll.js displays a tab's content using automatic scrolling
+// when the user scrolls to the bottom of the page and there is more data.
+//
+// Usage:
+//
+// 1. Adding infinite scrolling to a tab pane using "show" method
+//
+//  The steps below describe adding scrolling to the project#show action.
+//
+//  a. In the "app/views/projects/" folder add a file for your tab
+//      (ex: _show_jobs_and_pipelines.html.erb)
+//    In this file, add a div or tbody with data-infinite-scroller.
+//      Note: This page uses _show_tab_contents.html.erb so that
+//            several tabs can reuse this implementation.
+//    Also add the filters to be used for loading the tab content.
+//
+//  b. Add a file named "_show_contents_rows.html.erb" that loads
+//    the data (by invoking get_objects_and_names from the controller).
+//
+//  c. In the "app/controllers/projects_controller.rb,
+//    Update the show method to add a block for "params[:partial]"
+//      that loads the show_contents_rows partial.
+//    Optionally, add a "tab_counts" method that loads the total number
+//      of objects count to be displayed for this tab.
+//
+// 2. Adding infinite scrolling to the "Recent" tab in "index" page
+//  The steps below describe adding scrolling to the pipeline_instances index page.
+//
+//  a. In the "app/views/pipeline_instances/_show_recent.html.erb/" file
+//      add a div or tbody with data-infinite-scroller.
+//
+//  b. Add the partial "_show_recent_rows.html.erb" that displays the
+//      page contents on scroll using the @objects
+
 function maybe_load_more_content(event) {
     var scroller = this;
     var $container = $(event.data.container);
diff --git a/apps/workbench/app/assets/javascripts/modal_pager.js b/apps/workbench/app/assets/javascripts/modal_pager.js
new file mode 100644 (file)
index 0000000..58cf7e4
--- /dev/null
@@ -0,0 +1,44 @@
+// Usage:
+//
+// 1. Add some buttons to your modal, one with class="pager-next" and
+// one with class="pager-prev".
+//
+// 2. Put multiple .modal-body sections in your modal.
+//
+// 3. Add a "pager-count" div where page count is shown.
+// For ex: "1 of 10" when showing first page of 10 pages.
+
+$(document).on('click', '.modal .pager-next', function() {
+    var $modal = $(this).parents('.modal');
+    $modal.data('page', ($modal.data('page') || 0) + 1).trigger('pager:render');
+    return false;
+}).on('click', '.modal .pager-prev', function() {
+    var $modal = $(this).parents('.modal');
+    $modal.data('page', ($modal.data('page') || 1) - 1).trigger('pager:render');
+    return false;
+}).on('ready ajax:success', function() {
+    $('.modal').trigger('pager:render');
+}).on('pager:render', '.modal', function() {
+    var $modal = $(this);
+    var page = $modal.data('page') || 0;
+    var $panes = $('.modal-body', $modal);
+    if (page >= $panes.length) {
+        // Somehow moved past end
+        page = $panes.length - 1;
+        $modal.data('page', page);
+    } else if (page < 0) {
+        page = 0;
+    }
+
+    var $pager_count = $('.pager-count', $modal);
+    $pager_count.text((page+1) + " of " + $panes.length);
+
+    var selected = $panes.hide().eq(page).show();
+    enableButton($('.pager-prev', $modal), page > 0);
+    enableButton($('.pager-next', $modal), page < $panes.length - 1);
+    function enableButton(btn, ok) {
+        btn.prop('disabled', !ok).
+            toggleClass('btn-primary', ok).
+            toggleClass('btn-default', !ok);
+    }
+});
index e820ba978ec3dddf07dcec83f08fb9573d6a7727..8bb25c13c080138641e6e8ed1a22124a585c44ae 100644 (file)
@@ -1,6 +1,6 @@
 function run_pipeline_button_state() {
     var a = $('a.editable.required.editable-empty,input.form-control.required[value=""]');
-    if (a.length > 0) {
+    if ((a.length > 0) || ($('.unreadable-inputs-present').length)) {
         $(".run-pipeline-button").addClass("disabled");
     }
     else {
@@ -44,6 +44,17 @@ $(document).on('ready ajax:complete', function() {
             $tag.parent().prev().css("background-color", "");
         }
     });
+    $('input.required').each(function() {
+        var $tag = $(this);
+        if ($tag.hasClass("unreadable-input")) {
+            $tag.parent().parent().css("background-color", "#ffdddd");
+            $tag.parent().parent().prev().css("background-color", "#ffdddd");
+        }
+        else {
+            $tag.parent().parent().css("background-color", "");
+            $tag.parent().parent().prev().css("background-color", "");
+        }
+    });
     run_pipeline_button_state();
 });
 
index 55df78697c59112b50e4b0d61f753dc56f775453..5c69c50c119b5dd62c930b3b2144c109812d6871 100644 (file)
@@ -86,3 +86,11 @@ $(document).
             on('click', dispatch_selection_action);
         $(this).trigger('selections-updated');
     });
+
+function select_all_items() {
+  $(".arv-selectable-items :checkbox").filter(":visible").prop("checked", true).trigger("change");
+}
+
+function unselect_all_items() {
+  $(".arv-selectable-items :checkbox").filter(":visible").prop("checked", false).trigger("change");
+}
index ec7eee44b72d3954600a79365d9f4eaa2896e56a..9560bb7dc86662a12d49672254205d2ef890508c 100644 (file)
@@ -296,3 +296,15 @@ span.editable-textile {
 ul.nav.nav-tabs {
     font-size: 90%
 }
+
+.hover-dropdown:hover .dropdown-menu {
+  display: block;
+}
+
+.arv-description-as-subtitle .editable-inline,
+.arv-description-as-subtitle .editable-inline .form-group,
+.arv-description-as-subtitle .editable-inline .form-group .editable-input,
+.arv-description-as-subtitle .editable-inline .form-group .editable-input textarea,
+{
+    width: 98%!important;
+}
index 7737a3cfe4abdc8cded0159ac3bb1d5e13527768..e6ef6eb894d9ca3e6aad25aba909bcbbb0f32305 100644 (file)
@@ -1,3 +1,5 @@
+require "arvados/collection"
+
 class ActionsController < ApplicationController
 
   skip_filter :require_thread_api_token, only: [:report_issue_popup, :report_issue]
@@ -100,141 +102,113 @@ class ActionsController < ApplicationController
     end
   end
 
-  def arv_normalize mt, *opts
-    r = ""
-    env = Hash[ENV].
-      merge({'ARVADOS_API_HOST' =>
-              arvados_api_client.arvados_v1_base.
-              sub(/\/arvados\/v1/, '').
-              sub(/^https?:\/\//, ''),
-              'ARVADOS_API_TOKEN' => 'x',
-              'ARVADOS_API_HOST_INSECURE' =>
-              Rails.configuration.arvados_insecure_https ? 'true' : 'false'
-            })
-    IO.popen([env, 'arv-normalize'] + opts, 'w+b') do |io|
-      io.write mt
-      io.close_write
-      while buf = io.read(2**16)
-        r += buf
-      end
+  expose_action :combine_selected_files_into_collection do
+    link_uuids, coll_ids = params["selection"].partition do |sel_s|
+      ArvadosBase::resource_class_for_uuid(sel_s) == Link
     end
-    r
-  end
 
-  expose_action :combine_selected_files_into_collection do
-    uuids = []
-    pdhs = []
-    files = []
-    params["selection"].each do |s|
-      a = ArvadosBase::resource_class_for_uuid s
-      if a == Link
-        begin
-          if (m = CollectionsHelper.match(Link.find(s).head_uuid))
-            pdhs.append(m[1] + m[2])
-            files.append(m)
-          end
-        rescue
+    unless link_uuids.empty?
+      Link.select([:head_uuid]).where(uuid: link_uuids).each do |link|
+        if ArvadosBase::resource_class_for_uuid(link.head_uuid) == Collection
+          coll_ids << link.head_uuid
         end
-      elsif (m = CollectionsHelper.match(s))
-        pdhs.append(m[1] + m[2])
-        files.append(m)
-      elsif (m = CollectionsHelper.match_uuid_with_optional_filepath(s))
-        uuids.append(m[1])
-        files.append(m)
       end
     end
 
-    pdhs = pdhs.uniq
-    uuids = uuids.uniq
-    chash = {}
-
-    Collection.select([:uuid, :manifest_text]).where(uuid: uuids).each do |c|
-      chash[c.uuid] = c
+    uuids = []
+    pdhs = []
+    source_paths = Hash.new { |hash, key| hash[key] = [] }
+    coll_ids.each do |coll_id|
+      if m = CollectionsHelper.match(coll_id)
+        key = m[1] + m[2]
+        pdhs << key
+        source_paths[key] << m[4]
+      elsif m = CollectionsHelper.match_uuid_with_optional_filepath(coll_id)
+        key = m[1]
+        uuids << key
+        source_paths[key] << m[4]
+      end
     end
 
-    Collection.select([:portable_data_hash, :manifest_text]).where(portable_data_hash: pdhs).each do |c|
-      chash[c.portable_data_hash] = c
+    unless pdhs.empty?
+      Collection.where(portable_data_hash: pdhs.uniq).
+          select([:uuid, :portable_data_hash]).each do |coll|
+        unless source_paths[coll.portable_data_hash].empty?
+          uuids << coll.uuid
+          source_paths[coll.uuid] = source_paths.delete(coll.portable_data_hash)
+        end
+      end
     end
 
-    combined = ""
-    files_in_dirs = {}
-    files.each do |m|
-      mt = chash[m[1]+m[2]].andand.manifest_text
-      if not m[4].nil? and m[4].size > 1
-        manifest_files = files_in_dirs['.']
-        if !manifest_files
-          manifest_files = []
-          files_in_dirs['.'] = manifest_files
-        end
-        manifest_file = m[4].split('/')[-1]
-        uniq_file = derive_unique_filename(manifest_file, manifest_files)
-        normalized = arv_normalize mt, '--extract', ".#{m[4]}"
-        normalized = normalized.gsub(/(\d+:\d+:)(#{Regexp.quote manifest_file})/) {|s| "#{$1}#{uniq_file}" }
-        combined += normalized
-        manifest_files << uniq_file
+    new_coll = Arv::Collection.new
+    Collection.where(uuid: uuids.uniq).
+        select([:uuid, :manifest_text]).each do |coll|
+      src_coll = Arv::Collection.new(coll.manifest_text)
+      src_pathlist = source_paths[coll.uuid]
+      if src_pathlist.any?(&:blank?)
+        src_pathlist = src_coll.each_file_path
+        destdir = nil
       else
-        mt = arv_normalize mt
-        manifest_streams = mt.split "\n"
-        adjusted_streams = []
-        manifest_streams.each do |stream|
-          manifest_parts = stream.split
-          adjusted_parts = []
-          manifest_files = files_in_dirs[manifest_parts[0]]
-          if !manifest_files
-            manifest_files = []
-            files_in_dirs[manifest_parts[0]] = manifest_files
-          end
-
-          manifest_parts.each do |part|
-            part_match = /(\d+:\d+:)(\S+)/.match(part)
-            if part_match
-              uniq_file = derive_unique_filename(part_match[2], manifest_files)
-              adjusted_parts << "#{part_match[1]}#{uniq_file}" 
-              manifest_files << uniq_file
-            else
-              adjusted_parts << part
-            end
-          end
-          adjusted_streams << adjusted_parts.join(' ')
+        destdir = "."
+      end
+      src_pathlist.each do |src_path|
+        src_path = src_path.sub(/^(\.\/|\/|)/, "./")
+        src_stream, _, basename = src_path.rpartition("/")
+        dst_stream = destdir || src_stream
+        # Generate a unique name by adding (1), (2), etc. to it.
+        # If the filename has a dot that's not at the beginning, insert the
+        # number just before that.  Otherwise, append the number to the name.
+        if match = basename.match(/[^\.]\./)
+          suffix_start = match.begin(0) + 1
+        else
+          suffix_start = basename.size
         end
-        adjusted_streams.each do |stream|
-          combined += (stream + "\n")
+        suffix_size = 0
+        dst_path = nil
+        loop.each_with_index do |_, try_count|
+          dst_path = "#{dst_stream}/#{basename}"
+          break unless new_coll.exist?(dst_path)
+          uniq_suffix = "(#{try_count + 1})"
+          basename[suffix_start, suffix_size] = uniq_suffix
+          suffix_size = uniq_suffix.size
         end
+        new_coll.cp_r(src_path, dst_path, src_coll)
       end
     end
 
-    normalized = arv_normalize combined
-    newc = Collection.new({:manifest_text => normalized})
-    newc.name = newc.name || "Collection created at #{Time.now.localtime}"
+    coll_attrs = {
+      manifest_text: new_coll.manifest_text,
+      name: "Collection created at #{Time.now.localtime}",
+    }
+    flash = {}
 
     # set owner_uuid to current project, provided it is writable
-    current_project_writable = false
-    action_data = JSON.parse(params['action_data']) if params['action_data']
-    if action_data && action_data['current_project_uuid']
-      current_project = Group.find(action_data['current_project_uuid']) rescue nil
-      if (current_project && current_project.writable_by.andand.include?(current_user.uuid))
-        newc.owner_uuid = action_data['current_project_uuid']
-        current_project_writable = true
-      end
+    action_data = Oj.load(params['action_data'] || "{}")
+    if action_data['current_project_uuid'] and
+        current_project = Group.find?(action_data['current_project_uuid']) and
+        current_project.writable_by.andand.include?(current_user.uuid)
+      coll_attrs[:owner_uuid] = current_project.uuid
+      flash[:message] =
+        "Created new collection in the project #{current_project.name}."
+    else
+      flash[:message] = "Created new collection in your Home project."
     end
 
-    newc.save!
-
-    chash.each do |k,v|
-      l = Link.new({
-                     tail_uuid: k,
-                     head_uuid: newc.uuid,
-                     link_class: "provenance",
-                     name: "provided"
-                   })
-      l.save!
+    newc = Collection.create!(coll_attrs)
+    source_paths.each_key do |src_uuid|
+      unless Link.create({
+                           tail_uuid: src_uuid,
+                           head_uuid: newc.uuid,
+                           link_class: "provenance",
+                           name: "provided",
+                         })
+        flash[:error] = "
+An error occurred when saving provenance information for this collection.
+You can try recreating the collection to get a copy with full provenance data."
+        break
+      end
     end
-
-    msg = current_project_writable ?
-              "Created new collection in the project #{current_project.name}." :
-              "Created new collection in your Home project."
-
-    redirect_to newc, flash: {'message' => msg}
+    redirect_to(newc, flash: flash)
   end
 
   def report_issue_popup
index 8d25e68eef49fdf28e74d17ac44ad4bc5210844e..0610fd2e81852569c0e37da380fd3bbff51f2ac0 100644 (file)
@@ -198,7 +198,7 @@ class CollectionsController < ApplicationController
 
     if current_user
       if Keep::Locator.parse params["uuid"]
-        @same_pdh = Collection.filter([["portable_data_hash", "=", @object.portable_data_hash]])
+        @same_pdh = Collection.filter([["portable_data_hash", "=", @object.portable_data_hash]]).limit(20)
         if @same_pdh.results.size == 1
           redirect_to collection_path(@same_pdh[0]["uuid"])
           return
@@ -206,6 +206,8 @@ class CollectionsController < ApplicationController
         owners = @same_pdh.map(&:owner_uuid).to_a.uniq
         preload_objects_for_dataclass Group, owners
         preload_objects_for_dataclass User, owners
+        uuids = @same_pdh.map(&:uuid).to_a.uniq
+        preload_links_for_objects uuids
         render 'hash_matches'
         return
       else
index 7edf8cc30d542a69489589b52b3c05d4abf0e270..398417734c71c34f2aaac71fbf700eaf4d5f50d1 100644 (file)
@@ -4,8 +4,6 @@ class JobsController < ApplicationController
     'show' == ctrl.action_name
   }
 
-  include JobsHelper
-
   def generate_provenance(jobs)
     return if params['tab_pane'] != "Provenance"
 
index b4cce9be03e42bd2899590101a671717deb6295b..c5fbda0cf349177801a0bcbbd75c7c95634b56ef 100644 (file)
@@ -284,6 +284,62 @@ class PipelineInstancesController < ApplicationController
     %w(Compare Graph)
   end
 
+  helper_method :unreadable_inputs_present?
+  def unreadable_inputs_present?
+    unless @unreadable_inputs_present.nil?
+      return @unreadable_inputs_present
+    end
+
+    input_uuids = []
+    input_pdhs = []
+    @object.components.each do |k, component|
+      next if !component
+      component[:script_parameters].andand.each do |p, tv|
+        if (tv.is_a? Hash) and ((tv[:dataclass] == "Collection") || (tv[:dataclass] == "File"))
+          if tv[:value]
+            value = tv[:value]
+          elsif tv[:default]
+            value = tv[:default]
+          else
+            value = ''
+          end
+          if value.present?
+            split = value.split '/'
+            if CollectionsHelper.match(split[0])
+              input_pdhs << split[0]
+            else
+              input_uuids << split[0]
+            end
+          end
+        end
+      end
+    end
+
+    input_pdhs = input_pdhs.uniq
+    input_uuids = input_uuids.uniq
+
+    preload_collections_for_objects input_uuids if input_uuids.any?
+    preload_for_pdhs input_pdhs if input_pdhs.any?
+
+    @unreadable_inputs_present = false
+    input_uuids.each do |uuid|
+      if !collections_for_object(uuid).any?
+        @unreadable_inputs_present = true
+        break
+      end
+    end
+    if !@unreadable_inputs_present
+      input_pdhs.each do |pdh|
+        if !collection_for_pdh(pdh).any?
+          @unreadable_inputs_present = true
+          break
+        end
+      end
+    end
+
+    @unreadable_inputs_present
+  end
+
   protected
   def for_comparison v
     if v.is_a? Hash or v.is_a? Array
index 3302771814eb3bc217f72c2e0aa5768e932dc65e..b43ad65e9134cb1ce69af4e603850b4a60a5b823 100644 (file)
@@ -65,7 +65,7 @@ class ProjectsController < ApplicationController
       {
         :name => 'Subprojects',
         :filters => [%w(uuid is_a arvados#group)]
-      } if current_user
+      }
     pane_list <<
       {
         :name => 'Other_objects',
@@ -136,7 +136,7 @@ class ProjectsController < ApplicationController
           item.update_attributes owner_uuid: current_user.uuid
           @removed_uuids << item.uuid
         rescue ArvadosApiClient::ApiErrorResponseException => e
-          if e.message.include? 'collection_owner_uuid_name_unique'
+          if e.message.include? '_owner_uuid_name_unique'
             rename_to = item.name + ' removed from ' +
                         (@object.name ? @object.name : @object.uuid) +
                         ' at ' + Time.now.to_s
index d32c92a1e71fde336c99b52b990b86f019662af8..c5b3501b328e1214cc00c292c19f61d0be07312f 100644 (file)
@@ -16,4 +16,20 @@ class RepositoriesController < ApplicationController
     panes.delete('Attributes') if !current_user.is_admin
     panes
   end
+
+  def show_tree
+    @commit = params[:commit]
+    @path = params[:path] || ''
+    @subtree = @object.ls_subtree @commit, @path.chomp('/')
+  end
+
+  def show_blob
+    @commit = params[:commit]
+    @path = params[:path]
+    @blobdata = @object.cat_file @commit, @path
+  end
+
+  def show_commit
+    @commit = params[:commit]
+  end
 end
index 0ca5a85f018af48187865efe030195bbdeeebdbf..f6d571157d02459f8450478ba42aee71c572e3e5 100644 (file)
@@ -52,15 +52,15 @@ class UsersController < ApplicationController
                1.month.ago.beginning_of_month,
                Time.now.beginning_of_month]]
     @spans.each do |span, threshold_start, threshold_end|
-      @activity[:logins][span] = Log.
+      @activity[:logins][span] = Log.select(%w(uuid modified_by_user_uuid)).
         filter([[:event_type, '=', 'login'],
                 [:object_kind, '=', 'arvados#user'],
                 [:created_at, '>=', threshold_start],
                 [:created_at, '<', threshold_end]])
-      @activity[:jobs][span] = Job.
+      @activity[:jobs][span] = Job.select(%w(uuid modified_by_user_uuid)).
         filter([[:created_at, '>=', threshold_start],
                 [:created_at, '<', threshold_end]])
-      @activity[:pipeline_instances][span] = PipelineInstance.
+      @activity[:pipeline_instances][span] = PipelineInstance.select(%w(uuid modified_by_user_uuid)).
         filter([[:created_at, '>=', threshold_start],
                 [:created_at, '<', threshold_end]])
       @activity.each do |type, act|
index 72f961f38a225e1e707fc8db92d1da77e5f7c7d7..14b1c34d11e0d45821a1929c00254038c36ba50f 100644 (file)
@@ -165,7 +165,11 @@ module ApplicationHelper
       if opts[:no_link] or (resource_class == User && !current_user)
         raw(link_name)
       else
-        (link_to raw(link_name), { controller: resource_class.to_s.tableize, action: 'show', id: ((opts[:name_link].andand.uuid) || link_uuid) }, style_opts) + raw(tags)
+        controller_class = resource_class.to_s.tableize
+        if controller_class.eql?('groups') and object.andand.group_class.eql?('project')
+          controller_class = 'projects'
+        end
+        (link_to raw(link_name), { controller: controller_class, action: 'show', id: ((opts[:name_link].andand.uuid) || link_uuid) }, style_opts) + raw(tags)
       end
     else
       # just return attrvalue if it is not recognizable as an Arvados object or uuid.
@@ -178,29 +182,50 @@ module ApplicationHelper
   end
 
   def link_to_arvados_object_if_readable(attrvalue, link_text_if_not_readable, opts={})
-    resource_class = resource_class_for_uuid(attrvalue)
+    resource_class = resource_class_for_uuid(attrvalue.split('/')[0]) if attrvalue.is_a?(String)
     if !resource_class
       return link_to_if_arvados_object attrvalue, opts
     end
 
+    readable = object_readable attrvalue, resource_class
+    if readable
+      link_to_if_arvados_object attrvalue, opts
+    elsif opts[:required] and current_user # no need to show this for anonymous user
+      raw('<div><input type="text" style="border:none;width:100%;background:#ffdddd" disabled=true class="required unreadable-input" value="') + link_text_if_not_readable + raw('" ></input></div>')
+    else
+      link_text_if_not_readable
+    end
+  end
+
+  # This method takes advantage of preloaded collections and objects.
+  # Hence you can improve performance by first preloading objects
+  # related to the page context before using this method.
+  def object_readable attrvalue, resource_class=nil
+    # if it is a collection filename, check readable for the locator
+    attrvalue = attrvalue.split('/')[0] if attrvalue
+
+    resource_class = resource_class_for_uuid(attrvalue) if resource_class.nil?
+    return if resource_class.nil?
+
+    return_value = nil
     if resource_class.to_s == 'Collection'
       if CollectionsHelper.match(attrvalue)
-        readable = collection_for_pdh(attrvalue).any?
+        found = collection_for_pdh(attrvalue)
+        return_value = found.first if found.any?
       else
-        readable = collections_for_object(attrvalue).any?
+        found = collections_for_object(attrvalue)
+        return_value = found.first if found.any?
       end
     else
-      readable = object_for_dataclass(resource_class, attrvalue)
-    end
-
-    if readable
-      link_to_if_arvados_object attrvalue, opts
-    else
-      link_text_if_not_readable
+      return_value = object_for_dataclass(resource_class, attrvalue)
     end
+    return_value
   end
 
-  def render_editable_attribute(object, attr, attrvalue=nil, htmloptions={})
+  # Render an editable attribute with the attrvalue of the attr.
+  # The htmloptions are added to the editable element's list of attributes.
+  # The nonhtml_options are only used to customize the display of the element.
+  def render_editable_attribute(object, attr, attrvalue=nil, htmloptions={}, nonhtml_options={})
     attrvalue = object.send(attr) if attrvalue.nil?
     if not object.attribute_editable?(attr)
       if attrvalue && attrvalue.length > 0
@@ -252,11 +277,16 @@ module ApplicationHelper
       "id" => span_id,
       :class => "editable #{is_textile?( object, attr ) ? 'editable-textile' : ''}"
     }.merge(htmloptions).merge(ajax_options)
+
     edit_tiptitle = 'edit'
     edit_tiptitle = 'Warning: do not use hyphens in the repository name as they will be stripped' if (object.class.to_s == 'Repository' and attr == 'name')
-    edit_button = raw('<a href="#" class="btn btn-xs btn-default btn-nodecorate" data-toggle="x-editable tooltip" data-toggle-selector="#' + span_id + '" data-placement="top" title="' + (htmloptions[:tiptitle] || edit_tiptitle) + '"><i class="fa fa-fw fa-pencil"></i></a>')
-    if htmloptions[:btnplacement] == :left
+
+    edit_button = raw('<a href="#" class="btn btn-xs btn-' + (nonhtml_options[:btnclass] || 'default') + ' btn-nodecorate" data-toggle="x-editable tooltip" data-toggle-selector="#' + span_id + '" data-placement="top" title="' + (nonhtml_options[:tiptitle] || edit_tiptitle) + '"><i class="fa fa-fw fa-pencil"></i>' + (nonhtml_options[:btntext] || '') + '</a>')
+
+    if nonhtml_options[:btnplacement] == :left
       edit_button + ' ' + span_tag
+    elsif nonhtml_options[:btnplacement] == :top
+      edit_button + raw('<br/>') + span_tag
     else
       span_tag + ' ' + edit_button
     end
@@ -294,7 +324,7 @@ module ApplicationHelper
     end
 
     if not object.andand.attribute_editable?(attr)
-      return link_to_arvados_object_if_readable(attrvalue, attrvalue, friendly_name: true)
+      return link_to_arvados_object_if_readable(attrvalue, attrvalue, {friendly_name: true, required: required})
     end
 
     if dataclass
@@ -346,10 +376,11 @@ module ApplicationHelper
            success: 'page-refresh'
          }.to_json,
         })
+
       return content_tag('div', :class => 'input-group') do
         html = text_field_tag(dn, display_value,
                               :class =>
-                              "form-control #{'required' if required}")
+                              "form-control #{'required' if required} #{'unreadable-input' if attrvalue.present? and !object_readable(attrvalue, Collection)}")
         html + content_tag('span', :class => 'input-group-btn') do
           link_to('Choose',
                   modal_path,
diff --git a/apps/workbench/app/helpers/jobs_helper.rb b/apps/workbench/app/helpers/jobs_helper.rb
deleted file mode 100644 (file)
index 889dd23..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-module JobsHelper
-  def stderr_log_history(job_uuids, limit=2000)
-    results = []
-
-    log_history = Log.where(event_type: 'stderr',
-                            object_uuid: job_uuids).limit(limit).order('id DESC')
-    if !log_history.results.empty?
-      reversed_results = log_history.results.reverse
-      reversed_results.each do |entry|
-        if entry.andand.properties
-          properties = entry.properties
-          text = properties[:text]
-          if text
-            results = results.concat text.split("\n")
-          end
-        end
-      end
-    end
-    return results
-  end
-
-end
index b0d5216efd1588069050d5b0d5aa371abc07492d..ba05f9e88cd7cd2f0345c993d5e60a7603e2bb8c 100644 (file)
@@ -306,4 +306,11 @@ module PipelineInstancesHelper
     raw(s)
   end
 
+  def render_unreadable_inputs_present
+    if current_user and controller.class.name.eql?('PipelineInstancesController') and unreadable_inputs_present?
+      raw('<div class="alert alert-danger unreadable-inputs-present">' +
+            '<p>One or more inputs provided are not readable by you. ' +
+              'Please correct these before you can run the pipeline.</p></div>')
+    end
+  end
 end
index f575e20d4ea964355dda807bbafd5d21a33892e9..03d70b2897063435298707e3068dfa79ca16ff34 100644 (file)
@@ -1,3 +1,5 @@
+require "arvados/keep"
+
 class PipelineInstance < ArvadosBase
   attr_accessor :pipeline_template
 
@@ -81,4 +83,54 @@ class PipelineInstance < ArvadosBase
   def textile_attributes
     [ 'description' ]
   end
+
+  def job_uuids
+    components_map { |cspec| cspec[:job][:uuid] rescue nil }
+  end
+
+  def job_log_ids
+    components_map { |cspec| cspec[:job][:log] rescue nil }
+  end
+
+  def stderr_log_object_uuids
+    result = job_uuids.values.compact
+    result << uuid
+  end
+
+  def stderr_log_query(limit=nil)
+    query = Log.
+      where(event_type: "stderr",
+            object_uuid: stderr_log_object_uuids).
+      order("id DESC")
+    unless limit.nil?
+      query = query.limit(limit)
+    end
+    query
+  end
+
+  def stderr_log_lines(limit=2000)
+    stderr_log_query(limit).results.reverse.
+      flat_map { |log| log.properties[:text].split("\n") rescue [] }
+  end
+
+  def has_readable_logs?
+    log_pdhs, log_uuids = job_log_ids.values.compact.partition do |loc_s|
+      Keep::Locator.parse(loc_s)
+    end
+    if log_pdhs.any? and
+        Collection.where(portable_data_hash: log_pdhs).limit(1).results.any?
+      true
+    elsif log_uuids.any? and
+        Collection.where(uuid: log_uuids).limit(1).results.any?
+      true
+    else
+      stderr_log_query(1).results.any?
+    end
+  end
+
+  private
+
+  def components_map
+    Hash[components.map { |cname, cspec| [cname, yield(cspec)] }]
+  end
 end
index b062dda8610ae7feed3494befbcd42acea37ec4e..1caab89cc70fe643dc37965fcde5561ce0096f70 100644 (file)
@@ -12,4 +12,103 @@ class Repository < ArvadosBase
       []
     end
   end
+
+  def show commit_sha1
+    refresh
+    run_git 'show', commit_sha1
+  end
+
+  def cat_file commit_sha1, path
+    refresh
+    run_git 'cat-file', 'blob', commit_sha1 + ':' + path
+  end
+
+  def ls_tree_lr commit_sha1
+    refresh
+    run_git 'ls-tree', '-l', '-r', commit_sha1
+  end
+
+  # subtree returns a list of files under the given path at the
+  # specified commit. Results are returned as an array of file nodes,
+  # where each file node is an array [file mode, blob sha1, file size
+  # in bytes, path relative to the given directory]. If the path is
+  # not found, [] is returned.
+  def ls_subtree commit, path
+    path = path.chomp '/'
+    subtree = []
+    ls_tree_lr(commit).each_line do |line|
+      mode, type, sha1, size, filepath = line.split
+      next if type != 'blob'
+      if filepath[0,path.length] == path and
+          (path == '' or filepath[path.length] == '/')
+        subtree << [mode.to_i(8), sha1, size.to_i,
+                    filepath[path.length,filepath.length]]
+      end
+    end
+    subtree
+  end
+
+  # http_fetch_url returns the first http:// or https:// url (if any)
+  # in the api response's clone_urls attribute.
+  def http_fetch_url
+    clone_urls.andand.select { |u| /^http/ =~ u }.first
+  end
+
+  protected
+
+  # refresh fetches the latest repository content into the local
+  # cache. It is a no-op if it has already been run on this object:
+  # this (pretty much) avoids doing more than one remote git operation
+  # per Workbench request.
+  def refresh
+    run_git 'fetch', http_fetch_url, '+*:*' unless @fresh
+    @fresh = true
+  end
+
+  # run_git sets up the ARVADOS_API_TOKEN environment variable,
+  # creates a local git directory for this repository if necessary,
+  # executes "git --git-dir localgitdir {args to run_git}", and
+  # returns the output. It raises GitCommandError if git exits
+  # non-zero.
+  def run_git *gitcmd
+    if not @workdir
+      workdir = File.expand_path uuid+'.git', Rails.configuration.repository_cache
+      if not File.exists? workdir
+        FileUtils.mkdir_p Rails.configuration.repository_cache
+        [['git', 'init', '--bare', workdir],
+        ].each do |cmd|
+          system *cmd
+          raise GitCommandError.new($?.to_s) unless $?.exitstatus == 0
+        end
+      end
+      @workdir = workdir
+    end
+    [['git', '--git-dir', @workdir, 'config', '--local',
+      "credential.#{http_fetch_url}.username", 'none'],
+     ['git', '--git-dir', @workdir, 'config', '--local',
+      "credential.#{http_fetch_url}.helper",
+      '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'],
+     ['git', '--git-dir', @workdir, 'config', '--local',
+           'http.sslVerify',
+           Rails.configuration.arvados_insecure_https ? 'false' : 'true'],
+     ].each do |cmd|
+      system *cmd
+      raise GitCommandError.new($?.to_s) unless $?.exitstatus == 0
+    end
+    env = {}.
+      merge(ENV).
+      merge('ARVADOS_API_TOKEN' => Thread.current[:arvados_api_token])
+    cmd = ['git', '--git-dir', @workdir] + gitcmd
+    io = IO.popen(env, cmd, err: [:child, :out])
+    output = io.read
+    io.close
+    # "If [io] is opened by IO.popen, close sets $?." --ruby 2.2.1 docs
+    unless $?.exitstatus == 0
+      raise GitCommandError.new("`git #{gitcmd.join ' '}` #{$?}: #{output}")
+    end
+    output
+  end
+
+  class GitCommandError < StandardError
+  end
 end
index 3233d8d0062d71f6aa97538b046bd53ab2db74e8..b1e63da375885f7b2e34fb5e75b1326b05c1e568 100644 (file)
@@ -7,6 +7,10 @@
       </div>
 
       <div class="modal-body">
+        <% if params[:message].present? %>
+          <p> <%= params[:message] %> </p>
+        <% end %>
+
         <% project_filters, chooser_filters = (params[:filters] || []).partition do |attr, op, val|
              attr == "owner_uuid" and op == "="
            end %>
index 5e6211f4623fbedfcfe72539fc1beb4c56b3540c..9441a46c26d067f423188db099d240087f29a191 100644 (file)
                 link_disabled = "disabled"
               end
             end
-          elsif controller.model_class.to_s == 'PipelineInstance'
-            log_uuids = [@object.uuid] + pipeline_jobs(@object).collect{|x|x[:job].andand[:uuid]}.compact
-            if stderr_log_history(log_uuids, 1).empty?
-              data_toggle = "disabled"
-              tab_tooltip = "Log data is not available"
-              link_disabled = "disabled"
-            end
+          elsif (controller.model_class.to_s == 'PipelineInstance' and
+                 !@object.has_readable_logs?)
+            data_toggle = "disabled"
+            tab_tooltip = "Log data is not available"
+            link_disabled = "disabled"
           end
         end
       %>
index 21d6521051c188cb2adc376c7e1a38b7feb11a3e..f22ba87a511ba7e2544e1b6125cd9444167dae4b 100644 (file)
@@ -48,6 +48,7 @@
 
   <%= link_to(send("choose_#{share_class}_path",
       title: "Share with #{share_class}",
+      message: "Only #{share_class} you are allowed to access are shown. Please contact your administrator if you need to be added to a specific group.",
       by_project: false,
       preview_pane: false,
       multiple: true,
index 31ff2e6e21a6e659ad8f75d14d2630e020b4d62b..398f248a39bc478b016b64537a310a6b578bb9b8 100644 (file)
   <% end %>
 <% end %>
 
-<%
-  # Display any flash messages in an alert. If there is any entry with "error" key, alert-danger is used.
-  flash_msg = ''
-  flash_msg_is_error = false
-  flash.each do |msg|
-    flash_msg_is_error ||= (msg[0]=='error')
-    flash_msg += ('<p class="contain-align-left">' + msg[1] + '</p>')
-  end
-  if flash_msg != ''
-%>
-<div class="flash-message alert <%= flash_msg_is_error ? 'alert-danger' : 'alert-warning' %>"><%=flash_msg.html_safe%></div>
+<% unless flash["error"].blank? %>
+<div class="flash-message alert alert-danger" role="alert">
+  <p class="contain-align-left"><%= flash["error"] %></p>
+</div>
+<% flash.delete("error") %>
+<% end %>
+
+<% unless flash.empty? %>
+<div class="flash-message alert alert-warning">
+  <% flash.each do |_, msg| %>
+  <p class="contain-align-left"><%= msg %></p>
+  <% end %>
+</div>
 <% end %>
index 26e71c8d5d32a545f6ccccccaed32af3d98b2087..e3c79f143b0de8fdf6d50dcc591ef05af6fef7b8 100644 (file)
@@ -1,13 +1,3 @@
-<script>
-function select_all_files() {
-  $("#collection_files :checkbox").filter(":visible").prop("checked", true).trigger("change");
-}
-
-function unselect_all_files() {
-  $("#collection_files :checkbox").filter(":visible").prop("checked", false).trigger("change");
-}
-</script>
-
 <%
   preview_selectable_container = ''
   preview_selectable = ''
@@ -38,8 +28,8 @@ function unselect_all_files() {
           </ul>
         </div>
         <div class="btn-group btn-group-sm">
-          <button id="select-all" type="button" class="btn btn-default" onClick="select_all_files()">Select all</button>
-          <button id="unselect-all" type="button" class="btn btn-default" onClick="unselect_all_files()">Unselect all</button>
+          <button id="select-all" type="button" class="btn btn-default" onClick="select_all_items()">Select all</button>
+          <button id="unselect-all" type="button" class="btn btn-default" onClick="unselect_all_items()">Unselect all</button>
         </div>
       </div>
       <div class="pull-right">
@@ -53,7 +43,7 @@ function unselect_all_files() {
   <% if file_tree.nil? or file_tree.empty? %>
     <p>This collection is empty.</p>
   <% else %>
-    <ul id="collection_files" class="collection_files <%=preview_selectable_container%>">
+    <ul id="collection_files" class="collection_files arv-selectable-items <%=preview_selectable_container%>">
     <% dirstack = [file_tree.first.first] %>
     <% file_tree.take(10000).each_with_index do |(dirname, filename, size), index| %>
       <% file_path = CollectionsHelper::file_path([dirname, filename]) %>
index 7c4abb080eb8b9a393308dfc62c3ad18de928eaf..c93b6acbdaed631bfa4a89381cfcc1a02bfa9e1e 100644 (file)
@@ -1,3 +1,9 @@
+<%
+  message = "The following collections have this content:"
+  if @same_pdh.items_available > @same_pdh.results.size
+    message += ' (' + (@same_pdh.items_available - @same_pdh.results.size).to_s + ' more results are not shown)'
+  end
+%>
 <div class="row">
   <div class="col-md-10 col-md-offset-1">
     <div class="panel panel-info">
@@ -5,7 +11,7 @@
         <h3 class="panel-title"><%= params["uuid"] %></h3>
       </div>
       <div class="panel-body">
-        <p><i>The following collections have this content:</i></p>
+        <p><i><%= message %></i></p>
         <% @same_pdh.sort { |a,b| b.created_at <=> a.created_at }.each do |c| %>
           <div class="row">
             <div class="col-md-8">
diff --git a/apps/workbench/app/views/getting_started/_getting_started_popup.html.erb b/apps/workbench/app/views/getting_started/_getting_started_popup.html.erb
new file mode 100644 (file)
index 0000000..0db0567
--- /dev/null
@@ -0,0 +1,179 @@
+<style>
+div.figure {
+}
+.style_image1 {
+  border: 10px solid #ddd;
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+.style_image2 {
+  border: 10px solid #ddd;
+  display: block;
+  margin-left: 1em;
+}
+div.figure p {
+  text-align: center;
+  font-style: italic;
+  text-indent: 0;
+  border-top:-0.3em;
+}
+</style>
+
+<div id="getting-started-modal-window" class="modal">
+  <div class="modal-dialog modal-with-loading-spinner" style="width: 50em">
+    <div class="modal-content">
+      <div class="modal-header" style="text-align: center">
+        <button type="button" class="close" data-dismiss="modal" aria-hidden="true">x</button>
+        <div>
+          <div class="col-sm-8"><h4 class="modal-title" style="text-align: right">Getting Started with Arvados</h4></div>  <%#Todo: center instead of right%>
+          <div class="spinner spinner-32px spinner-h-center col-sm-1" hidden="true"></div>
+        </div>
+        <br/>
+      </div>
+
+      <%#First Page%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Welcome!</div></p>
+          <p>
+            What you're looking at right now is <b>Workbench</b>, the graphical interface to the Arvados system.
+          </p><p>
+            <div class="figure">
+              <p> <%= image_tag "pipeline-running.gif", :class => "style_image1" %></p> <%#Todo: shorter gif%>
+              <p>Running the Pathomap pipeline in Arvados.</p>
+            </div>
+          </p><p>
+            Click the <span class="btn btn-sm btn-primary">Next &gt;</span> button below for a speed tour of Arvados.
+          </p><p style="margin-top:2em;">
+            <em><strong>Note:</strong> You can always come back to this Getting Started guide by clicking the <span class="fa fa-lg fa-question-circle"></span> in the upper-right corner.</em>
+          </p>
+        </div>
+      </div>
+
+      <%#Page Two%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Take It for a Spin</div></p>
+          <p>
+            Run your first pipeline in 3 quick steps:
+          </p>
+          <div style="display: block; margin: 0em 2em; padding-top: 1em; padding-bottom: 1em; border: thin dashed silver;">
+            <p style="margin-left: 1em;">
+              <em>First, <a href="/users/welcome">log-in or register</a> with any Google account if you haven't already.</em>
+            </p><p>
+              <ol><li> Go to the <span class="btn btn-sm btn-default"><i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard</span> &gt; <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span>
+                  <p style="margin-top:1em;">
+                    <%= image_tag "mouse-move.gif", :class => "style_image2" %>
+                  </p>
+                </li>
+                <li> <span class="btn btn-sm btn-default"><i class="fa fa-fw fa-gear"></i>Mason Lab -- Ancestry Mapper (public)</span> &gt; <span class="btn btn-sm btn-primary">Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span></li><br>
+                <li> <span class="btn btn-sm btn-primary">Run <i class="fa fa-fw fa-play"></i></span></li>
+              </ol>
+          </p></div>
+          <p style="margin-top:1em;">
+            <i class="fa fa-flag fa-flip-horizontal" style="color: green"></i> <i class="fa fa-child"></i>
+            <strong>Voila!</strong> <i class="fa fa-child"></i> <i class="fa fa-flag" style="color: green"></i>
+            Your pipeline is now spooling up and getting ready to run!
+          </p><p>
+            Go ahead, try it for yourself right now. <span class="glyphicon glyphicon-thumbs-up"></span>
+          </p><p>
+            Or click <span class="btn btn-sm btn-primary">Next &gt;</span> below to keep reading!
+          </p>
+        </div>
+      </div>
+
+      <%#Page Three%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Three Useful Terms</div></p>
+          <ol>
+            <li>
+              <strong>Pipeline</strong> — A re-usable series of analysis steps.
+              <ul>
+                <li>
+                  Also known as a “workflow” in other systems
+                </li><li>
+                  A list of well-documented public pipelines can be found in the upper right corner by clicking the <span class="fa fa-lg fa-question-circle"></span> &gt; <a href="<%= Rails.configuration.arvados_public_data_doc_url %>">Public Pipelines and Datasets</a>
+                </li><li>
+                  Pro-tip: A Pipeline contains Jobs which contain Tasks
+                </li><li>
+                  Pipelines can only be shared within a project
+                </li>
+              </ul>
+            </li>
+
+            <li>
+              <strong>Collection </strong>— Like a folder, but better.
+              <ul>
+                <li>
+                  Upload data right in your browser
+                </li><li>
+                  Better than a folder?
+                  <ul><li>
+                      Collections contain the content-address of the data instead of the data itself
+                    </li><li>
+                      Sets of data can be flexibly defined and re-defined without duplicating data
+                    </li>
+                </ul></li><li>
+                  Collections can be shared using the "Sharing and Permissions"  &gt; "Share" button
+                </li>
+              </ul>
+            </li>
+
+            <li>
+              <strong>Projects </strong>— Contain pipelines templates, pipeline instances (individual runs of a pipeline), and collections.
+              <ul><li>
+                  The most useful one is your default "Home" project, under Projects &gt; Home
+                </li><li>
+                  Projects can be shared using the "sharing" tab
+                </li>
+              </ul>
+            </li>
+          </ol>
+
+        </div>
+      </div>
+
+      <%#Page Four%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Six Reasons Arvados is Awesome</div></p>
+          <p>
+            This guide, and in fact all of Workbench, is just a teaser for the full power of Arvados:
+          </p>
+          <ol>
+            <li>
+              <strong>Reproducible analyses</strong>: Enough said.
+            </li><li>
+              <strong>Data provenance</strong>: Every file in Arvados can tell you where it came from.
+            </li><li>
+              <strong>Serious scaling</strong>: Need 500 GB of space? 200 compute hours? Arvados scales and parallelizes your work for you intelligently.
+            </li><li>
+              <strong>Share pipelines or data</strong>: Easily publish your work to the world, just like <a href="http://www.pathomap.org/2015/04/08/run-the-pathomap-human-ancestry-pipeline-on-arvados/">the Pathomap team did</a>.
+            </li><li>
+              <strong>Use existing pipelines</strong>: Use best-practices pipelines on your own data with the click of a button.
+            </li><li>
+              <strong>Open-source</strong>: Arvados is completely open-source. Check out our <a href="http://arvados.org">developer site</a>.
+            </li>
+          </ol>
+          <p style="margin-top: 1em;">
+            Want to use the command-line, or hungry to learn more? Check out the User Guide at <a href="http://doc.arvados.org/">doc.arvados.org</a>.
+          </p><p>
+            Questions still? Head over to <a href="http://doc.arvados.org/">doc.arvados.org</a> to find mailing-list and contact info for the Arvados community.
+          </p><p>
+            That's all, folks! Click the "x" up top to leave this guide.
+          </p>
+        </div>
+      </div>
+
+      <div class="modal-footer">
+        <div style="text-align:center">
+          <button class="btn btn-default pager-prev"><i class="fa fa-fw fa-chevron-left"></i><span style="font-weight: bold;"> Prev</span></button>
+          <button class="btn btn-default pager-next"><span style="font-weight: bold;">Next </span><i class="fa fa-fw fa-chevron-right"></i></button>
+          <div class="pager-count pull-right"><span style="margin:5px"></span></div>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
index cdc47c17169401995a24a77c9e2a8cdcbc46d72f..67756e7f8a77b02bc175f69a6277d84ba157bf36 100644 (file)
@@ -37,9 +37,9 @@
     @media (max-width: 979px) { body { padding-top: 0; } }
 
     @media (max-width: 767px) {
-    .breadcrumbs {
-    display: none;
-    }
+      .breadcrumbs {
+        padding-top: 0;
+      }
     }
   </style>
   <link href="//netdna.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.css" rel="stylesheet">
index 5c4af108c5a8cc62380449b6b6ca0505f90f3ca8..9f253a47a5915848f52e6023c0727279db940c62 100644 (file)
             <li class="dropdown notification-menu">
               <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="notifications-menu">
                 <span class="badge badge-alert notification-count"><%= user_notifications.length if user_notifications.any? %></span>
-                <%= current_user.email %> <span class="caret"></span>
+                <span class="fa fa-lg fa-user"></span>
+                <span class="caret"></span>
               </a>
               <ul class="dropdown-menu" role="menu">
+                <li role="presentation" class="dropdown-header">
+                  <%= current_user.email %>
+                </li>
                 <% if current_user.is_active %>
                 <li role="menuitem"><a href="/projects/<%=current_user.uuid%>" role="menuitem"><i class="fa fa-home fa-fw"></i> Home project </a></li>
                 <li role="menuitem"><a href="/manage_account" role="menuitem"><i class="fa fa-key fa-fw"></i> Manage account</a></li>
@@ -72,6 +76,7 @@
               <li class="dropdown">
                 <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="system-menu">
                   <span class="fa fa-lg fa-gear"></span>
+                  <span class="caret"></span>
                 </a>
                 <ul class="dropdown-menu" role="menu">
                   <li role="presentation" class="dropdown-header">
               </li>
             <% end %>
           <% else %>
-            <li><a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">Log in</a></li>
+            <li class="dropdown hover-dropdown login-menu">
+              <a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">Log in</a>
+              <ul class="dropdown-menu">
+                <li>
+                  <a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">
+                    <span class="fa fa-lg fa-sign-in"></span>
+                    <p style="margin-left: 1.6em; margin-top: -1.35em; margin-bottom: 0em; margin-right: 0.5em;">Log in or register with<br/>any Google account</p>
+                  </a>
+                </li>
+              </ul>
+            </li>
           <% end %>
 
           <li class="dropdown help-menu">
             <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="arv-help">
               <span class="fa fa-lg fa-question-circle"></span>
+              <span class="caret"></span>
             </a>
             <ul class="dropdown-menu">
+              <li role="presentation" class="dropdown-header">
+                Help
+              </li>
+              <li>
+              <%= link_to raw('<i class="fa fa-fw fa-info"></i> Getting Started ...'), "#",
+                   {'data-toggle' => "modal", 'data-target' => '#getting-started-modal-window'}  %>
+              </li>
+              <% if Rails.configuration.arvados_public_data_doc_url %>
+                <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> Public Pipelines and Data sets'), "#{Rails.configuration.arvados_public_data_doc_url}", target: "_blank" %></li>
+              <% end %>
               <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> Tutorials and User guide'), "#{Rails.configuration.arvados_docsite}/user", target: "_blank" %></li>
               <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> API Reference'), "#{Rails.configuration.arvados_docsite}/api", target: "_blank" %></li>
               <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> SDK Reference'), "#{Rails.configuration.arvados_docsite}/sdk", target: "_blank" %></li>
     <% end %>
 
     <%= render partial: 'browser_unsupported' %><%# requires JS support below %>
+    <%= render partial: 'getting_started/getting_started_popup' %>
 
     <div id="page-wrapper">
       <%= yield %>
 <div class="modal-container"></div>
 <div id="report-issue-modal-window"></div>
 <script src="/browser_unsupported.js"></script>
+
+<%  if current_user and !current_user.prefs[:getting_started_shown] and
+       !request.url.include?("/profile") and
+       !request.url.include?("/user_agreements") and
+       !request.url.include?("/inactive")%>
+  <script>
+    $("#getting-started-modal-window").modal('show');
+  </script>
+  <%
+    prefs = current_user.prefs
+    prefs[:getting_started_shown] = Time.now
+    current_user.update_attributes prefs: prefs.to_json
+  %>
+<% end %>
index 1a9cb3562afac89b3f615156af5c193851dc9c71..2ab8da1f4ce2d258d8f098c5c78175d6fbcdf713 100644 (file)
         <div class="row">
           <div class="col-md-6">
             <table>
+              <% # link to repo tree/file only if the repo is readable
+                 # and the commit is a sha1...
+                 repo =
+                 (/^[0-9a-f]{40}$/ =~ current_component[:script_version] and
+                 Repository.where(name: current_component[:repository]).first)
+
+                 # ...and the api server provides an http:// or https:// url
+                 repo = nil unless repo.andand.http_fetch_url
+                 %>
               <% [:script, :repository, :script_version, :supplied_script_version, :nondeterministic].each do |k| %>
                 <tr>
                   <td style="padding-right: 1em">
                   <td>
                     <% if current_component[k].nil? %>
                       (none)
+                    <% elsif repo and k == :repository %>
+                      <%= link_to current_component[k], show_repository_tree_path(id: repo.uuid, commit: current_component[:script_version], path: '/') %>
+                    <% elsif repo and k == :script %>
+                      <%= link_to current_component[k], show_repository_blob_path(id: repo.uuid, commit: current_component[:script_version], path: 'crunch_scripts/'+current_component[:script]) %>
+                    <% elsif repo and k == :script_version %>
+                      <%= link_to current_component[k], show_repository_commit_path(id: repo.uuid, commit: current_component[:script_version]) %>
                     <% else %>
                       <%= current_component[k] %>
                     <% end %>
index 7735997748389e1d3fa68713dc53c39626b961bf..dae57aa0e85ebbe3ccf7ce183a185dcc860b3f0b 100644 (file)
@@ -13,6 +13,8 @@
 
 <% else %>
   <%# state is either New or Ready %>
+  <%= render_unreadable_inputs_present %>
+
   <p><i>Here are all of the pipeline's components (jobs that will need to run in order to complete the pipeline). If you know what you're doing (or you're experimenting) you can modify these parameters before starting the pipeline. Usually, you only need to edit the settings presented on the "Inputs" tab above.</i></p>
 
   <%= render_pipeline_components("editable", :json, editable: true) %>
index 51fe8d4d3c717025c349391036e518a68782c058..f6c9e85883487c611de38276ca8ee5dd579ca1d9 100644 (file)
@@ -1,23 +1,3 @@
-<%
-  input_uuids = []
-  input_pdhs = []
-  @object.components.each do |k, component|
-    next if !component
-    component[:script_parameters].andand.each do |p, tv|
-      if tv.is_a? Hash and !tv[:value].nil? and (tv[:dataclass] == "Collection")
-        if CollectionsHelper.match(tv[:value])
-          input_pdhs << tv[:value]
-        else
-          input_uuids << tv[:value]
-        end
-      end
-    end
-  end
-
-  preload_collections_for_objects input_uuids if input_uuids.any?
-  preload_for_pdhs input_pdhs if input_pdhs.any?
-%>
-
 <table class="table pipeline-components-table" style="margin-top: -.1em">
   <colgroup>
     <col style="width: 20%" />
index e6b7ef20347cf2a5c25af1b0304da4554a981b7a..567ca72504146667b3f7c04c4be70775466426bf 100644 (file)
@@ -32,6 +32,8 @@
 <% if n_inputs == 0 %>
   <p>This pipeline does not need any further inputs specified. You can start it by clicking the "Run" button whenever you're ready. (It's not too late to change existing settings, though.)</p>
 <% else %>
+  <%= render_unreadable_inputs_present %>
+
   <p><i>Provide <%= n_inputs > 1 ? 'values' : 'a value' %> for the following <%= n_inputs > 1 ? 'parameters' : 'parameter' %>, then click the "Run" button to start the pipeline.</i></p>
   <% if @object.editable? %>
     <%= content_for :pi_input_form %>
index bb756a08274044fd867ca3cd18c5e10946bb5a5e..187dce7bd244e44468edbdda111c0ce1166406f6 100644 (file)
@@ -1,12 +1,44 @@
-<% log_uuids = [@object.uuid] + pipeline_jobs(@object).collect{|x|x[:job].andand[:uuid]}.compact %>
-<% log_history = stderr_log_history(log_uuids) %>
-<div id="event_log_div"
-     class="arv-log-event-listener arv-log-event-handler-append-logs arv-log-event-subscribe-to-pipeline-job-uuids arv-job-log-window"
-     data-object-uuids="<%= log_uuids.join(' ') %>"
-     ><%= log_history.join("\n") %></div>
-
-<%# Applying a long throttle suppresses the auto-refresh of this
-    partial that would normally be triggered by arv-log-event. %>
-<div class="arv-log-refresh-control"
-     data-load-throttle="86486400000" <%# 1001 nights %>
-     ></div>
+<% log_ids = @object.job_log_ids
+   still_logging, done_logging = log_ids.keys.partition { |k| log_ids[k].nil? }
+%>
+
+<% unless done_logging.empty? %>
+  <table class="topalign table table-condensed table-fixedlayout">
+    <colgroup>
+      <col width="40%" />
+      <col width="60%" />
+    </colgroup>
+    <thead>
+      <tr>
+        <th>finished component</th>
+        <th>job log</th>
+      </tr>
+    </thead>
+    <tbody>
+      <% done_logging.each do |cname| %>
+      <tr>
+        <td><%= cname %></td>
+        <td><%= link_to("Log for #{cname}",
+                {controller: "collections", action: "show", id: log_ids[cname]})
+                %></td>
+      </tr>
+      <% end %>
+    </tbody>
+  </table>
+<% end %>
+
+<% unless still_logging.empty? %>
+  <h4>Logs in progress</h4>
+
+  <div id="event_log_div"
+       class="arv-log-event-listener arv-log-event-handler-append-logs arv-log-event-subscribe-to-pipeline-job-uuids arv-job-log-window"
+       data-object-uuids="<%= @object.stderr_log_object_uuids.join(' ') %>"
+       ><%= @object.stderr_log_lines.join("\n") %></div>
+
+  <%# Applying a long throttle suppresses the auto-refresh of this
+      partial that would normally be triggered by arv-log-event. %>
+  <div class="arv-log-refresh-control"
+       data-load-throttle="86486400000" <%# 1001 nights %>
+       ></div>
+<% end %>
+
index fb9a30593adb5682be73b4e6eab48c47f0176dd6..04696845fd751a149cc55b28442cb41b3dea6ed6 100644 (file)
@@ -6,7 +6,7 @@
       <div class="col-sm-4">
         <%= render :partial => "show_object_button", :locals => {object: object, size: 'xs'} %>
         <% if object.respond_to?(:name) %>
-          <%= render_editable_attribute object, 'name', nil, {tiptitle: 'rename'} %>
+          <%= render_editable_attribute object, 'name', nil, {}, {tiptitle: 'rename'} %>
         <% else %>
           <%= object.class_for_display %> <%= object.uuid %>
         <% end %>
index a6a371275b35b54782c5e647eb611efd887090f7..5f0f60b0b26a18d2f6a5285144efdb8c96e080a4 100644 (file)
@@ -26,7 +26,9 @@
     </td>
 
     <td>
-      <%= render_editable_attribute (name_link || object), 'name', nil, {tiptitle: 'rename'} %>
+      <% if object.respond_to?(:name) %>
+        <%= render_editable_attribute (name_link || object), 'name', nil, {}, {tiptitle: 'rename'} %>
+      <% end %>
     </td>
 
     <td class="arv-description-in-table">
index 726094074b1de7b810d4221353fda9e756ee12b0..443f359036a1776e655e32c36cd090c93870fd73 100644 (file)
@@ -1,5 +1,5 @@
 <% if @object.respond_to? :description %>
   <div class="arv-description-as-subtitle">
-    <%= render_editable_attribute @object, 'description', nil, { 'data-emptytext' => "(No description provided)", 'data-toggle' => 'manual' } %>
+    <%= render_editable_attribute @object, 'description', nil, { 'data-emptytext' => "(No description provided)", 'data-toggle' => 'manual', 'data-mode' => 'inline', 'data-rows' => 10 }, { btntext: 'Edit', btnclass: 'primary', btnplacement: :top } %>
   </div>
 <% end %>
index 0b308db6ff072e1f22c833831a586707cfbaa652..a33a1cfd355186c6effc6bfc07eb0f7b043defff 100644 (file)
           <% end %>
         </ul>
       </div>
+      <div class="btn-group btn-group-sm">
+        <button id="select-all" type="button" class="btn btn-default" onClick="select_all_items()">Select all</button>
+        <button id="unselect-all" type="button" class="btn btn-default" onClick="unselect_all_items()">Unselect all</button>
+      </div>
     </div>
     <div class="col-sm-4 pull-right">
       <input type="text" class="form-control filterable-control" placeholder="Search project contents" data-filterable-target="table.arv-index.arv-project-<%= tab_pane %> tbody"/>
     </div>
   </div>
 
-  <table class="table table-condensed arv-index arv-project-<%= tab_pane %>">
+  <table class="table table-condensed arv-index arv-selectable-items arv-project-<%= tab_pane %>">
     <colgroup>
       <col width="0*" style="max-width: fit-content;" />
       <col width="0*" style="max-width: fit-content;" />
diff --git a/apps/workbench/app/views/repositories/_repository_breadcrumbs.html.erb b/apps/workbench/app/views/repositories/_repository_breadcrumbs.html.erb
new file mode 100644 (file)
index 0000000..14f9ba7
--- /dev/null
@@ -0,0 +1,13 @@
+<div class="pull-right">
+  <span class="deemphasize">Browsing <%= @object.name %> repository at commit</span>
+  <%= link_to(@commit, show_repository_commit_path(id: @object.uuid, commit: @commit), title: 'show commit message') %>
+</div>
+<p>
+  <%= link_to(@object.name, show_repository_tree_path(id: @object.uuid, commit: @commit, path: ''), title: 'show root directory of source tree') %>
+  <% parents = ''
+     (@path || '').split('/').each do |pathpart|
+     parents = parents + pathpart + '/'
+     %>
+    / <%= link_to pathpart, show_repository_tree_path(id: @object.uuid, commit: @commit, path: parents) %>
+  <% end %>
+</p>
diff --git a/apps/workbench/app/views/repositories/show_blob.html.erb b/apps/workbench/app/views/repositories/show_blob.html.erb
new file mode 100644 (file)
index 0000000..acc34d1
--- /dev/null
@@ -0,0 +1,13 @@
+<%= render partial: 'repository_breadcrumbs' %>
+
+<% if not @blobdata.valid_encoding? %>
+  <div class="alert alert-warning">
+    <p>
+      This file has an invalid text encoding, so it can't be shown
+      here.  (This probably just means it's a binary file, not a text
+      file.)
+    </p>
+  </div>
+<% else %>
+  <pre><%= @blobdata %></pre>
+<% end %>
diff --git a/apps/workbench/app/views/repositories/show_commit.html.erb b/apps/workbench/app/views/repositories/show_commit.html.erb
new file mode 100644 (file)
index 0000000..3690be6
--- /dev/null
@@ -0,0 +1,3 @@
+<%= render partial: 'repository_breadcrumbs' %>
+
+<pre><%= @object.show @commit %></pre>
diff --git a/apps/workbench/app/views/repositories/show_tree.html.erb b/apps/workbench/app/views/repositories/show_tree.html.erb
new file mode 100644 (file)
index 0000000..4e2fcec
--- /dev/null
@@ -0,0 +1,40 @@
+<%= render partial: 'repository_breadcrumbs' %>
+
+<table class="table table-condensed table-hover">
+  <thead>
+    <tr>
+      <th>File</th>
+      <th class="data-size">Size</th>
+    </tr>
+  </thead>
+  <tbody>
+    <% @subtree.each do |mode, sha1, size, subpath| %>
+      <tr>
+        <td>
+          <span style="opacity: 0.6">
+            <% pathparts = subpath.sub(/^\//, '').split('/')
+               basename = pathparts.pop
+               parents = @path
+               pathparts.each do |pathpart| %>
+              <% parents = parents + '/' + pathpart %>
+              <%= link_to pathpart, url_for(path: parents) %>
+              /
+            <% end %>
+          </span>
+          <%= link_to basename, url_for(action: :show_blob, path: parents + '/' + basename) %>
+        </td>
+        <td class="data-size">
+          <%= human_readable_bytes_html(size) %>
+        </td>
+      </tr>
+    <% end %>
+    <% if @subtree.empty? %>
+      <tr>
+        <td>
+          No files found.
+        </td>
+      </tr>
+    <% end %>
+  </tbody>
+  <tfoot></tfoot>
+</table>
index 25f4d75be19b0df6a450942bfd931f59aadc25c6..e6314cd1587cf4e4d9204e597c5cf69f2531b13b 100644 (file)
@@ -29,7 +29,7 @@
       </div>
 
       <div class="modal-footer">
-        <button class="btn btn-default" onClick="reset_form()" data-dismiss="modal" aria-hidden="true">Cancel</button>
+        <button type="button" class="btn btn-default" onClick="reset_form()" data-dismiss="modal" aria-hidden="true">Cancel</button>
         <button type="submit" class="btn btn-primary" autofocus>Submit</button>
       </div>
 
index 45aea01b957c419375cb103c46aec5b60315c95e..4fefa821caafb88be0a7647112514f98fa7c55f8 100644 (file)
         <div class="panel-body">
           <% if !missing_required_profile? && params[:offer_return_to] %>
             <div class="alert alert-success">
-              <p>Thank you for filling in your profile. <%= link_to 'Back to work!', params[:offer_return_to], class: 'btn btn-sm btn-primary' %></p>
+              <% if current_user.prefs[:getting_started_shown] %>
+                <p>Thank you for filling in your profile. <%= link_to 'Back to work!', params[:offer_return_to], class: 'btn btn-sm btn-primary' %></p>
+              <% else %>
+                <p>Thank you for filling in your profile. <%= link_to 'Get started', params[:offer_return_to], class: 'btn btn-sm btn-primary' %></p>
+              <% end %>
             </div>
           <% else %>
             <div class="alert alert-info">
                 <% end %>
               <% end %>
 
+              <%# If the user has other prefs, we need to preserve them %>
+              <% current_user.prefs.each do |key, value| %>
+                <% if key != :profile %>
+                  <input type="hidden" name="user[prefs][:<%=key%>]" value="<%=value.to_json%>">
+                <% end %>
+              <% end %>
+
               <% if show_save_button %>
                 <div class="form-group">
                   <div class="col-sm-offset-3 col-sm-8">
index f3d179238a38857530bb5db3e0b38d4b7974f1fe..4061ee83d89ae83a0ef363e1e43571326ca870e6 100644 (file)
@@ -131,6 +131,7 @@ common:
   arvados_insecure_https: true
   activation_contact_link: mailto:info@arvados.org
   arvados_docsite: http://doc.arvados.org
+  arvados_public_data_doc_url: http://arvados.org/projects/arvados/wiki/Public_Pipelines_and_Datasets
   arvados_theme: default
   show_user_agreement_inline: false
   secret_token: ~
@@ -138,8 +139,14 @@ common:
   default_openid_prefix: https://www.google.com/accounts/o8/id
   send_user_setup_notification_email: true
 
-  # Set user_profile_form_fields to enable and configure the user profile page.
-  # Default is set to false. A commented setting with full description is provided below.
+  # Scratch directory used by the remote repository browsing
+  # feature. If it doesn't exist, it (and any missing parents) will be
+  # created using mkdir_p.
+  repository_cache: <%= File.expand_path 'tmp/git', Rails.root %>
+
+  # Set user_profile_form_fields to enable and configure the user
+  # profile page. Default is set to false. A commented example with
+  # full description is provided below.
   user_profile_form_fields: false
 
   # Below is a sample setting of user_profile_form_fields config parameter.
@@ -202,5 +209,5 @@ common:
   # in the directory where your API server is running.
   anonymous_user_token: false
 
-  # Include Accept-Encoding header when making API requests
+  # Enable response payload compression in Arvados API requests.
   include_accept_encoding_header_in_api_requests: true
index 34a3224cfc2526d936cda60a6c3882dbc760d2a6..39b299cbcef479a029fd249d315017e446980b42 100644 (file)
@@ -1,39 +1,11 @@
-# SQLite version 3.x
-#   gem install sqlite3
-#
-#   Ensure the SQLite 3 gem is defined in your Gemfile
-#   gem 'sqlite3'
+# Note: The database configuration is not actually used.
 development:
-  adapter: sqlite3
-  database: db/development.sqlite3
-  pool: 5
-  timeout: 5000
-
-# Warning: The database defined as "test" will be erased and
-# re-generated from your development database when you run "rake".
-# Do not set this db to the same as development or production.
+  adapter: nulldb
 test:
-  adapter: sqlite3
-  database: db/test.sqlite3
-  pool: 5
-  timeout: 5000
-
+  adapter: nulldb
 production:
-  adapter: sqlite3
-  database: db/production.sqlite3
-  pool: 5
-  timeout: 5000
-
-# Note: The "diagnostics" database configuration is not actually used.
+  adapter: nulldb
 diagnostics:
-  adapter: sqlite3
-  database: db/diagnostics.sqlite3
-  pool: 5
-  timeout: 5000
-
-# Note: The "performance" database configuration is not actually used.
+  adapter: nulldb
 performance:
-  adapter: sqlite3
-  database: db/diagnostics.sqlite3
-  pool: 5
-  timeout: 5000
+  adapter: nulldb
index 7ed02e7dc9ba11aa8beb07cae5f9c934ce77fd9f..44d7ded6c10c70cd1f7f72f627ce619011828ec5 100644 (file)
@@ -26,6 +26,11 @@ ArvadosWorkbench::Application.routes.draw do
   resources :repositories do
     post 'share_with', on: :member
   end
+  # {format: false} prevents rails from treating "foo.png" as foo?format=png
+  get '/repositories/:id/tree/:commit' => 'repositories#show_tree'
+  get '/repositories/:id/tree/:commit/*path' => 'repositories#show_tree', as: :show_repository_tree, format: false
+  get '/repositories/:id/blob/:commit/*path' => 'repositories#show_blob', as: :show_repository_blob, format: false
+  get '/repositories/:id/commit/:commit' => 'repositories#show_commit', as: :show_repository_commit
   match '/logout' => 'sessions#destroy', via: [:get, :post]
   get '/logged_out' => 'sessions#index'
   resources :users do
index 8745d96792985d74f7116eaeb660bbb5c599e143..26ef67bcb932afe545cc218f1c565fa10dc61a24 100644 (file)
@@ -27,19 +27,18 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
     collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
     manifest_text = collection['manifest_text']
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('bar'), 'Not found bar in new collection manifest text'
-    assert manifest_text.include?('baz'), 'Not found baz in new collection manifest text'
-    assert manifest_text.include?('0:0:file1 0:0:file2 0:0:file3'),
-                'Not found 0:0:file1 0:0:file2 0:0:file3 in new collection manifest text'
-    assert manifest_text.include?('dir1/subdir'), 'Not found dir1/subdir in new collection manifest text'
-    assert manifest_text.include?('dir2'), 'Not found dir2 in new collection manifest text'
+    assert_includes(manifest_text, "foo")
+    assert_includes(manifest_text, "bar")
+    assert_includes(manifest_text, "baz")
+    assert_includes(manifest_text, "0:0:file1 0:0:file2 0:0:file3")
+    assert_includes(manifest_text, "dir1/subdir")
+    assert_includes(manifest_text, "dir2")
   end
 
   test "combine files  with repeated names into new collection" do
@@ -55,21 +54,19 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
     collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
     manifest_text = collection['manifest_text']
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('foo(1)'), 'Not found foo(1) in new collection manifest text'
-    assert manifest_text.include?('foo(2)'), 'Not found foo(2) in new collection manifest text'
-    assert manifest_text.include?('bar'), 'Not found bar in new collection manifest text'
-    assert manifest_text.include?('baz'), 'Not found baz in new collection manifest text'
-    assert manifest_text.include?('0:0:file1 0:0:file2 0:0:file3'),
-                'Not found 0:0:file1 0:0:file2 0:0:file3 in new collection manifest text'
-    assert manifest_text.include?('dir1/subdir'), 'Not found dir1/subdir in new collection manifest text'
-    assert manifest_text.include?('dir2'), 'Not found dir2 in new collection manifest text'
+    assert_includes(manifest_text, "foo(1)")
+    assert_includes(manifest_text, "foo(2)")
+    assert_includes(manifest_text, "bar")
+    assert_includes(manifest_text, "baz")
+    assert_includes(manifest_text, "0:0:file1 0:0:file2 0:0:file3")
+    assert_includes(manifest_text, "dir1/subdir")
+    assert_includes(manifest_text, "dir2")
   end
 
   test "combine collections with repeated filenames in almost similar directories and expect files with proper suffixes" do
@@ -90,26 +87,26 @@ class ActionsControllerTest < ActionController::TestCase
     collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
     manifest_text = collection['manifest_text']
 
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('foo(1)'), 'Not found foo(1) in new collection manifest text'
+    assert_includes(manifest_text, 'foo')
+    assert_includes(manifest_text, 'foo(1)')
 
     streams = manifest_text.split "\n"
     streams.each do |stream|
       if stream.start_with? './dir1'
         # dir1 stream
-        assert stream.include?(':alice(1)'), "Not found: alice(1) in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':alice.txt'), "Not found: alice.txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':alice(1).txt'), "Not found: alice(1).txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':bob.txt'), "Not found: bob.txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':carol.txt'), "Not found: carol.txt in dir1 in manifest text #{manifest_text}"
+        assert_includes(stream, ':alice(1)')
+        assert_includes(stream, ':alice.txt')
+        assert_includes(stream, ':alice(1).txt')
+        assert_includes(stream, ':bob.txt')
+        assert_includes(stream, ':carol.txt')
       elsif stream.start_with? './dir2'
         # dir2 stream
-        assert stream.include?(':alice.txt'), "Not found: alice.txt in dir2 in manifest text #{manifest_text}"
-        assert stream.include?(':alice(1).txt'), "Not found: alice(1).txt in dir2 in manifest text #{manifest_text}"
+        assert_includes(stream, ':alice.txt')
+        assert_includes(stream, ':alice(1).txt')
       elsif stream.start_with? '. '
         # . stream
-        assert stream.include?(':foo'), "Not found: foo in . in manifest text #{manifest_text}"
-        assert stream.include?(':foo(1)'), "Not found: foo(1) in . in manifest text #{manifest_text}"
+        assert_includes(stream, ':foo')
+        assert_includes(stream, ':foo(1)')
       end
     end
   end
@@ -123,7 +120,7 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
@@ -134,12 +131,12 @@ class ActionsControllerTest < ActionController::TestCase
     assert_equal 2, streams.length
     streams.each do |stream|
       if stream.start_with? './dir1'
-        assert stream.include?('foo'), 'Not found: foo in dir1'
+        assert_includes(stream, 'foo')
       elsif stream.start_with? '. '
-        assert stream.include?('foo'), 'Not found: foo in .'
+        assert_includes(stream, 'foo')
       end
     end
-    assert !manifest_text.include?('foo(1)'), 'Found foo(1) in new collection manifest text'
+    refute_includes(manifest_text, 'foo(1)')
   end
 
   test "combine foo files from two different collection streams and expect proper filename suffixes" do
@@ -151,7 +148,7 @@ class ActionsControllerTest < ActionController::TestCase
 
     assert_response 302   # collection created and redirected to new collection page
 
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
     new_collection_uuid = response.headers['Location'].split('/')[-1]
 
     use_token :active
@@ -160,7 +157,7 @@ class ActionsControllerTest < ActionController::TestCase
 
     streams = manifest_text.split "\n"
     assert_equal 1, streams.length, "Incorrect number of streams in #{manifest_text}"
-    assert manifest_text.include?('foo'), "Not found foo in new collection manifest text #{manifest_text}"
-    assert manifest_text.include?('foo(1)'), "Not found foo(1) in new collection manifest text #{manifest_text}"
+    assert_includes(manifest_text, 'foo')
+    assert_includes(manifest_text, 'foo(1)')
   end
 end
index ec17e8e4222676e50c7b92266b31a6180e359763..119ed8c8f42c5d4e2129e4257b19509b592df3d5 100644 (file)
@@ -138,6 +138,33 @@ class ProjectsControllerTest < ActionController::TestCase
     assert_equal api_fixture('users', 'subproject_admin')['uuid'], new_specimen.owner_uuid
   end
 
+  # An object which does not offer an expired_at field but has a xx_owner_uuid_name_unique constraint
+  # will be renamed when removed and another object with the same name exists in user's home project.
+  [
+    ['groups', 'subproject_in_asubproject_with_same_name_as_one_in_active_user_home'],
+    ['pipeline_templates', 'template_in_asubproject_with_same_name_as_one_in_active_user_home'],
+  ].each do |dm, fixture|
+    test "removing #{dm} from a subproject results in renaming it when there is another such object with same name in home project" do
+      object = api_fixture(dm, fixture)
+      delete(:remove_item,
+             { id: api_fixture('groups', 'asubproject')['uuid'],
+               item_uuid: object['uuid'],
+               format: 'js' },
+             session_for(:active))
+      assert_response :success
+      assert_match(/\b#{object['uuid']}\b/, @response.body,
+                   "removed object not named in response")
+      use_token :active
+      if dm.eql?('groups')
+        found = Group.find(object['uuid'])
+      else
+        found = PipelineTemplate.find(object['uuid'])
+      end
+      assert_equal api_fixture('users', 'active')['uuid'], found.owner_uuid
+      assert_equal true, found.name.include?(object['name'] + ' removed from ')
+    end
+  end
+
   test 'projects#show tab infinite scroll partial obeys limit' do
     get_contents_rows(limit: 1, filters: [['uuid','is_a',['arvados#job']]])
     assert_response :success
index f95bb7731fab4bd86888d18ded3a6af0e2a6eb6b..25bf55768529f58327e7f8861835516e0b0f5f06 100644 (file)
@@ -1,7 +1,9 @@
 require 'test_helper'
+require 'helpers/repository_stub_helper'
 require 'helpers/share_object_helper'
 
 class RepositoriesControllerTest < ActionController::TestCase
+  include RepositoryStubHelper
   include ShareObjectHelper
 
   [
@@ -62,4 +64,61 @@ class RepositoriesControllerTest < ActionController::TestCase
       end
     end
   end
+
+  ### Browse repository content
+
+  [:active, :spectator].each do |user|
+    test "show tree to #{user}" do
+      reset_api_fixtures_after_test false
+      sha1, _, _ = stub_repo_content
+      get :show_tree, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+      }, session_for(user)
+      assert_response :success
+      assert_select 'tr td a', 'COPYING'
+      assert_select 'tr td', '625 bytes'
+      assert_select 'tr td a', 'apps'
+      assert_select 'tr td a', 'workbench'
+      assert_select 'tr td a', 'Gemfile'
+      assert_select 'tr td', '33.7 KiB'
+    end
+
+    test "show commit to #{user}" do
+      reset_api_fixtures_after_test false
+      sha1, commit, _ = stub_repo_content
+      get :show_commit, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+      }, session_for(user)
+      assert_response :success
+      assert_select 'pre', h(commit)
+    end
+
+    test "show blob to #{user}" do
+      reset_api_fixtures_after_test false
+      sha1, _, filedata = stub_repo_content filename: 'COPYING'
+      get :show_blob, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+        path: 'COPYING',
+      }, session_for(user)
+      assert_response :success
+      assert_select 'pre', h(filedata)
+    end
+  end
+
+  ['', '/'].each do |path|
+    test "show tree with path '#{path}'" do
+      reset_api_fixtures_after_test false
+      sha1, _, _ = stub_repo_content filename: 'COPYING'
+      get :show_tree, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+        path: path,
+      }, session_for(:active)
+      assert_response :success
+      assert_select 'tr td', 'COPYING'
+    end
+  end
 end
index e7accad42335dfc16f4edfe3996516288f8b3a29..463dacc02226bf104eaa91bbe664cf2bc386ce6a 100644 (file)
@@ -1,6 +1,8 @@
 require 'test_helper'
 
 class CollectionsHelperTest < ActionView::TestCase
+  reset_api_fixtures :after_each_test, false
+
   [
     ["filename.csv", true],
     ["filename.fa", true],
diff --git a/apps/workbench/test/helpers/repository_stub_helper.rb b/apps/workbench/test/helpers/repository_stub_helper.rb
new file mode 100644 (file)
index 0000000..b7d0573
--- /dev/null
@@ -0,0 +1,33 @@
+module RepositoryStubHelper
+  # Supply some fake git content.
+  def stub_repo_content opts={}
+    fakesha1 = opts[:sha1] || 'abcdefabcdefabcdefabcdefabcdefabcdefabcd'
+    fakefilename = opts[:filename] || 'COPYING'
+    fakefilesrc = File.expand_path('../../../../../'+fakefilename, __FILE__)
+    fakefile = File.read fakefilesrc
+    fakecommit = <<-EOS
+      commit abcdefabcdefabcdefabcdefabcdefabcdefabcd
+      Author: Fake R <fake@example.com>
+      Date:   Wed Apr 1 11:59:59 2015 -0400
+
+          It's a fake commit.
+
+    EOS
+    Repository.any_instance.stubs(:ls_tree_lr).with(fakesha1).returns <<-EOS
+      100644 blob eec475862e6ec2a87554e0fca90697e87f441bf5     226    .gitignore
+      100644 blob acbd7523ed49f01217874965aa3180cccec89d61     625    COPYING
+      100644 blob d645695673349e3947e8e5ae42332d0ac3164cd7   11358    LICENSE-2.0.txt
+      100644 blob c7a36c355b4a2b94dfab45c9748330022a788c91     622    README
+      100644 blob dba13ed2ddf783ee8118c6a581dbf75305f816a3   34520    agpl-3.0.txt
+      100644 blob 9bef02bbfda670595750fd99a4461005ce5b8f12     695    apps/workbench/.gitignore
+      100644 blob b51f674d90f68bfb50d9304068f915e42b04aea4    2249    apps/workbench/Gemfile
+      100644 blob b51f674d90f68bfb50d9304068f915e42b04aea4    2249    apps/workbench/Gemfile
+      100755 blob cdd5ebaff27781f93ab85e484410c0ce9e97770f    1012    crunch_scripts/hash
+    EOS
+    Repository.any_instance.
+      stubs(:cat_file).with(fakesha1, fakefilename).returns fakefile
+    Repository.any_instance.
+      stubs(:show).with(fakesha1).returns fakecommit
+    return fakesha1, fakecommit, fakefile
+  end
+end
index ba09acc810dcf0e908cb9de811e07948d4dddf32..9d8f8d03252d036cf9dca696b833a1097f17e46b 100644 (file)
@@ -19,6 +19,7 @@ module ShareObjectHelper
       # Otherwise, the not-included assertions might falsely pass because
       # the modal hasn't loaded yet.
       find(".selectable", text: name).click
+      assert_text "Only #{share_type} you are allowed to access are shown"
       assert(has_no_selector?(".modal-dialog-preview-pane"),
              "preview pane available in sharing dialog")
       if share_type == 'users' and obj and obj['email']
index e8917792ad6a5f579f0550dd18ea8cea1b535157..1e486d8fef662ea241998ae835c4a2360e16b7bf 100644 (file)
@@ -23,8 +23,8 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
       end
       within('.navbar-fixed-top') do
         assert_selector 'a', text: Rails.configuration.site_name.downcase
-        assert_selector 'a', text: "#{user['email']}"
-        find('a', text: "#{user['email']}").click
+        assert(page.has_link?("notifications-menu"), 'no user menu')
+        page.find("#notifications-menu").click
         within('.dropdown-menu') do
           assert_selector 'a', text: 'Log out'
         end
@@ -62,8 +62,8 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
     assert_selector 'a', text: 'Data collections'
     assert_selector 'a', text: 'Jobs and pipelines'
     assert_selector 'a', text: 'Pipeline templates'
+    assert_selector 'a', text: 'Subprojects'
     assert_selector 'a', text: 'Advanced'
-    assert_no_selector 'a', text: 'Subprojects'
     assert_no_selector 'a', text: 'Other objects'
     assert_no_selector 'button', text: 'Add data'
 
@@ -183,6 +183,19 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
     assert_no_selector 'a', text: 'Run this pipeline'
   end
 
+  test "anonymous user accesses subprojects tab in shared project" do
+    visit PUBLIC_PROJECT + '#Subprojects'
+
+    assert_text 'Subproject in anonymous accessible project'
+
+    within first('tr[data-kind="arvados#group"]') do
+      click_link 'Show'
+    end
+
+    # in subproject
+    assert_text 'Description for subproject in anonymous accessible project'
+  end
+
   [
     ['pipeline_in_publicly_accessible_project', true],
     ['pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false],
@@ -198,9 +211,11 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
       if pipeline_page
         object = api_fixture('pipeline_instances')[fixture]
         page = "/pipeline_instances/#{object['uuid']}"
+        expect_log_text = "Log for foo"
       else      # job
         object = api_fixture('jobs')[fixture]
         page = "/jobs/#{object['uuid']}"
+        expect_log_text = "stderr crunchstat"
       end
 
       if user
@@ -213,39 +228,42 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
       click_link 'foo' if pipeline_page
 
       if objects_readable
+        assert_selector 'a[href="#Log"]', text: 'Log'
+        assert_no_selector 'a[data-toggle="disabled"]', text: 'Log'
+        assert_no_text 'Output data not available'
         if pipeline_page
           assert_text 'This pipeline was created from'
           assert_selector 'a', text: object['components']['foo']['job']['uuid']
+          # We'd like to test the Log tab on job pages too, but we can't right
+          # now because Poltergeist 1.x doesn't support JavaScript's
+          # Function.prototype.bind, which is used by job_log_graph.js.
+          click_link "Log"
+          assert_text expect_log_text
         end
-        assert_no_text 'Output data not available'
-        assert_selector 'a[href="#Log"]', text: 'Log'
-        assert_no_selector 'a[data-toggle="disabled"]', text: 'Log'
       else
+        assert_selector 'a[data-toggle="disabled"]', text: 'Log'
+        assert_text 'Output data not available'
+        assert_text object['job']
         if pipeline_page
           assert_no_text 'This pipeline was created from'  # template is not readable
           assert_no_selector 'a', text: object['components']['foo']['job']['uuid']
         end
+        click_link "Log"
         assert_text 'Output data not available'
-        assert_text object['job']
-        assert_selector 'a[data-toggle="disabled"]', text: 'Log'
-      end
-
-      click_link 'Log'
-      if objects_readable
-        assert_no_text 'foo'  # should be in Log tab
-        assert_text 'stderr crunchstat'   if pipeline_page
-      else
-        assert_text 'foo'     # Log tab disabled and hence still in first tab
-        assert_no_text 'stderr crunchstat'  # log line shouldn't be seen
+        assert_no_text expect_log_text
       end
     end
   end
 
   [
     ['new_pipeline_in_publicly_accessible_project', true],
+    ['new_pipeline_in_publicly_accessible_project', true, 'spectator'],
     ['new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false],
     ['new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false, 'spectator'],
     ['new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', true, 'admin'],
+    ['new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere', false],
+    ['new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere', false, 'spectator'],
+    ['new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere', true, 'admin'],
   ].each do |fixture, objects_readable, user=nil|
     test "access #{fixture} in public project with objects readable=#{objects_readable} with user #{user}" do
       object = api_fixture('pipeline_instances')[fixture]
@@ -264,13 +282,26 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
         if user == 'admin'
           assert_text 'input'
           assert_selector 'a', text: 'Choose'
+          assert_selector 'a', text: 'Run'
+          assert_no_selector 'a.disabled', text: 'Run'
         else
           assert_selector 'a', text: object['components']['foo']['script_parameters']['input']['value']
+          user ? (assert_selector 'a', text: 'Run') : (assert_no_selector 'a', text: 'Run')
         end
       else
         assert_no_text 'This pipeline was created from'  # template is not readable
-        assert_text object['components']['foo']['script_parameters']['input']['value']
-        assert_no_selector 'a', text: object['components']['foo']['script_parameters']['input']['value']
+        input = object['components']['foo']['script_parameters']['input']['value']
+        assert_no_selector 'a', text: input
+        if user
+          input = input.gsub('/', '\\/')
+          assert_text "One or more inputs provided are not readable"
+          assert_selector "input[type=text][value=#{input}]"
+          assert_selector 'a.disabled', text: 'Run'
+        else
+          assert_no_text "One or more inputs provided are not readable"
+          assert_text input
+          assert_no_selector 'a', text: 'Run'
+        end
       end
     end
   end
index a5789acae33374099f89e18e3ade8690ea23159e..b6a501f924e690012d2b3f96a646ccb857c743a3 100644 (file)
@@ -40,8 +40,8 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
       else
         # my account menu
         assert_selector 'a', text: Rails.configuration.site_name.downcase
-        assert page.has_link?("#{user['email']}"), 'Not found link - email'
-        find('a', text: "#{user['email']}").click
+        assert(page.has_link?("notifications-menu"), 'no user menu')
+        page.find("#notifications-menu").click
         within('.dropdown-menu') do
           if user['is_active']
             assert page.has_no_link?('Not active'), 'Found link - Not active'
@@ -71,6 +71,8 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
     within('.navbar-fixed-top') do
       page.find("#arv-help").click
       within('.dropdown-menu') do
+        assert_selector 'a', text:'Getting Started ...'
+        assert_selector 'a', text:'Public Pipelines and Data sets'
         assert page.has_link?('Tutorials and User guide'), 'No link - Tutorials and User guide'
         assert page.has_link?('API Reference'), 'No link - API Reference'
         assert page.has_link?('SDK Reference'), 'No link - SDK Reference'
@@ -108,7 +110,8 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
     ['active', api_fixture('users')['active'], true, true],
     ['admin', api_fixture('users')['admin'], true, true],
     ['active_no_prefs', api_fixture('users')['active_no_prefs'], true, false],
-    ['active_no_prefs_profile', api_fixture('users')['active_no_prefs_profile'], true, false],
+    ['active_no_prefs_profile_no_getting_started_shown',
+        api_fixture('users')['active_no_prefs_profile_no_getting_started_shown'], true, false],
   ].each do |token, user, invited, has_profile|
 
     test "visit home page for user #{token}" do
@@ -141,4 +144,59 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
       verify_system_menu user
     end
   end
+
+  test "test getting started help menu item" do
+    visit page_with_token('active')
+    within '.navbar-fixed-top' do
+      find('.help-menu > a').click
+      find('.help-menu .dropdown-menu a', text: 'Getting Started ...').click
+    end
+
+    within '.modal-content' do
+      assert_text 'Getting Started'
+      assert_selector 'button:not([disabled])', text: 'Next'
+      assert_no_selector 'button:not([disabled])', text: 'Prev'
+
+      # Use Next button to enable Prev button
+      click_button 'Next'
+      assert_selector 'button:not([disabled])', text: 'Prev'  # Prev button is now enabled
+      click_button 'Prev'
+      assert_no_selector 'button:not([disabled])', text: 'Prev'  # Prev button is again disabled
+
+      # Click Next until last page is reached and verify that it is disabled
+      (0..20).each do |i|   # currently we only have 4 pages, and don't expect to have more than 20 in future
+        click_button 'Next'
+        begin
+          find('button:not([disabled])', text: 'Next')
+        rescue => e
+          break
+        end
+      end
+      assert_no_selector 'button:not([disabled])', text: 'Next'  # Next button is disabled
+      assert_selector 'button:not([disabled])', text: 'Prev'     # Prev button is enabled
+      click_button 'Prev'
+      assert_selector 'button:not([disabled])', text: 'Next'     # Next button is now enabled
+
+      first('button', text: 'x').click
+    end
+    assert_text 'Active pipelines' # seeing dashboard now
+  end
+
+  test "test arvados_public_data_doc_url config unset" do
+    Rails.configuration.arvados_public_data_doc_url = false
+
+    visit page_with_token('active')
+    within '.navbar-fixed-top' do
+      find('.help-menu > a').click
+
+      assert_no_selector 'a', text:'Public Pipelines and Data sets'
+
+      assert_selector 'a', text:'Getting Started ...'
+      assert page.has_link?('Tutorials and User guide'), 'No link - Tutorials and User guide'
+      assert page.has_link?('API Reference'), 'No link - API Reference'
+      assert page.has_link?('SDK Reference'), 'No link - SDK Reference'
+      assert page.has_link?('Show version / debugging info ...'), 'No link - Show version / debugging info'
+      assert page.has_link?('Report a problem ...'), 'No link - Report a problem'
+    end
+  end
 end
index 4a7014c37db83639e34ae01dbe5e0a013d41041c..2eee6fe8dbade6c7272410cbc42443e9b8cb5b08 100644 (file)
@@ -1,20 +1,11 @@
 require 'integration_helper'
+require_relative 'integration_test_utils'
 
 class CollectionsTest < ActionDispatch::IntegrationTest
   setup do
     need_javascript
   end
 
-  # check_checkboxes_state asserts that the page holds at least one
-  # checkbox matching 'selector', and that all matching checkboxes
-  # are in state 'checkbox_status' (i.e. checked if true, unchecked otherwise)
-  def assert_checkboxes_state(selector, checkbox_status, msg=nil)
-    assert page.has_selector?(selector)
-    page.all(selector).each do |checkbox|
-      assert(checkbox.checked? == checkbox_status, msg)
-    end
-  end
-
   test "Can copy a collection to a project" do
     collection_uuid = api_fixture('collections')['foo_file']['uuid']
     collection_name = api_fixture('collections')['foo_file']['name']
@@ -212,7 +203,7 @@ class CollectionsTest < ActionDispatch::IntegrationTest
   end
 
   test "Collection portable data hash with multiple matches" do
-    pdh = api_fixture('collections')['baz_file']['portable_data_hash']
+    pdh = api_fixture('collections')['foo_file']['portable_data_hash']
     visit page_with_token('admin', "/collections/#{pdh}")
 
     matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
@@ -221,8 +212,22 @@ class CollectionsTest < ActionDispatch::IntegrationTest
     matches.each do |k,v|
       assert page.has_link?(v["name"]), "Page /collections/#{pdh} should contain link '#{v['name']}'"
     end
-    assert page.has_no_text?("Activity")
-    assert page.has_no_text?("Sharing and permissions")
+    assert_text 'The following collections have this content:'
+    assert_no_text 'more results are not shown'
+    assert_no_text 'Activity'
+    assert_no_text 'Sharing and permissions'
+  end
+
+  test "Collection portable data hash with multiple matches with more than one page of results" do
+    pdh = api_fixture('collections')['baz_file']['portable_data_hash']
+    visit page_with_token('admin', "/collections/#{pdh}")
+
+    assert_selector 'a', text: 'Collection_1'
+
+    assert_text 'The following collections have this content:'
+    assert_text 'more results are not shown'
+    assert_no_text 'Activity'
+    assert_no_text 'Sharing and permissions'
   end
 
   test "Filtering collection files by regexp" do
index 32f16a68dfa9bb7da49f0adbce9881aa0480aeda..06e17d617f10d0ffafede1cad61a055e4ec42bbf 100644 (file)
@@ -9,7 +9,7 @@ class ErrorsTest < ActionDispatch::IntegrationTest
 
   test "error page renders user navigation" do
     visit(page_with_token("active", "/collections/#{BAD_UUID}"))
-    assert(page.has_text?(api_fixture("users")["active"]["email"]),
+    assert(page.has_link?("notifications-menu"),
            "User information missing from error page")
     assert(page.has_no_text?(/log ?in/i),
            "Logged in user prompted to log in on error page")
@@ -17,7 +17,7 @@ class ErrorsTest < ActionDispatch::IntegrationTest
 
   test "no user navigation with expired token" do
     visit(page_with_token("expired", "/collections/#{BAD_UUID}"))
-    assert(page.has_no_text?(api_fixture("users")["active"]["email"]),
+    assert(page.has_no_link?("notifications-menu"),
            "Page visited with expired token included user information")
     assert(page.has_selector?("a", text: /log ?in/i),
            "Login prompt missing on expired token error page")
@@ -79,50 +79,45 @@ class ErrorsTest < ActionDispatch::IntegrationTest
   end
 
   test "API error page has Report problem button" do
+    # point to a bad api server url to generate fiddlesticks error
     original_arvados_v1_base = Rails.configuration.arvados_v1_base
+    Rails.configuration.arvados_v1_base = "https://[::1]:1/"
 
-    begin
-      # point to a bad api server url to generate fiddlesticks error
-      Rails.configuration.arvados_v1_base = "https://[100::f]:1/"
+    visit page_with_token("active")
 
-      visit page_with_token("active")
+    assert_text 'fiddlesticks'
 
-      assert_text 'fiddlesticks'
+    # reset api server base config to let the popup rendering to work
+    Rails.configuration.arvados_v1_base = original_arvados_v1_base
 
-      # reset api server base config to let the popup rendering to work
-      Rails.configuration.arvados_v1_base = original_arvados_v1_base
+    click_link 'Report problem'
 
-      click_link 'Report problem'
+    within '.modal-content' do
+      assert_text 'Report a problem'
+      assert_no_text 'Version / debugging info'
+      assert_text 'Describe the problem'
+      assert_text 'Send problem report'
+      # "Send" button should be disabled until text is entered
+      assert_no_selector 'a,button:not([disabled])', text: 'Send problem report'
+      assert_selector 'a,button', text: 'Cancel'
 
-      within '.modal-content' do
-        assert_text 'Report a problem'
-        assert_no_text 'Version / debugging info'
-        assert_text 'Describe the problem'
-        assert_text 'Send problem report'
-        # "Send" button should be disabled until text is entered
-        assert_no_selector 'a,button:not([disabled])', text: 'Send problem report'
-        assert_selector 'a,button', text: 'Cancel'
+      report = mock
+      report.expects(:deliver).returns true
+      IssueReporter.expects(:send_report).returns report
 
-        report = mock
-        report.expects(:deliver).returns true
-        IssueReporter.expects(:send_report).returns report
+      # enter a report text and click on report
+      find_field('report_issue_text').set 'my test report text'
+      click_button 'Send problem report'
 
-        # enter a report text and click on report
-        find_field('report_issue_text').set 'my test report text'
-        click_button 'Send problem report'
-
-        # ajax success updated button texts and added footer message
-        assert_no_selector 'a,button', text: 'Send problem report'
-        assert_no_selector 'a,button', text: 'Cancel'
-        assert_text 'Report sent'
-        assert_text 'Thanks for reporting this issue'
-        click_button 'Close'
-      end
-
-      # out of the popup now and should be back in the error page
-      assert_text 'fiddlesticks'
-    ensure
-      Rails.configuration.arvados_v1_base = original_arvados_v1_base
+      # ajax success updated button texts and added footer message
+      assert_no_selector 'a,button', text: 'Send problem report'
+      assert_no_selector 'a,button', text: 'Cancel'
+      assert_text 'Report sent'
+      assert_text 'Thanks for reporting this issue'
+      click_button 'Close'
     end
+
+    # out of the popup now and should be back in the error page
+    assert_text 'fiddlesticks'
   end
 end
diff --git a/apps/workbench/test/integration/integration_test_utils.rb b/apps/workbench/test/integration/integration_test_utils.rb
new file mode 100644 (file)
index 0000000..3fe5800
--- /dev/null
@@ -0,0 +1,12 @@
+# This file is used to define methods reusable by two or more integration tests
+#
+
+# check_checkboxes_state asserts that the page holds at least one
+# checkbox matching 'selector', and that all matching checkboxes
+# are in state 'checkbox_status' (i.e. checked if true, unchecked otherwise)
+def assert_checkboxes_state(selector, checkbox_status, msg=nil)
+  assert page.has_selector?(selector)
+  page.all(selector).each do |checkbox|
+    assert(checkbox.checked? == checkbox_status, msg)
+  end
+end
index 29bccd9d76b20846cd0d6ce7519b2858a81bcd5a..2cae500027aeebd8f79270a4ba26f61e4496373f 100644 (file)
@@ -95,10 +95,10 @@ class JobsTest < ActionDispatch::IntegrationTest
 
       # Re-running jobs doesn't currently work because the test API
       # server has no git repository to check against.  For now, check
-      # that the correct script version is mentioned in the
-      # Fiddlesticks error message.
+      # that the error message says something appropriate for that
+      # situation.
       if expect_options && use_latest
-        assert_text "Script version #{job['supplied_script_version']} does not resolve to a commit"
+        assert_text "077ba2ad3ea24a929091a9e6ce545c93199b8e57"
       else
         assert_text "Script version #{job['script_version']} does not resolve to a commit"
       end
index f2916741cb28fbc0cdee25ed85093a47a0b45f9e..da8f439dfe2df0a310d6bc7837968033bae49f29 100644 (file)
@@ -541,4 +541,45 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
     visit page_with_token 'active', '/pipeline_instances/' + pi['uuid']
     assert_text 'Queued for '
   end
+
+  test "job logs linked for running pipeline" do
+    pi = api_fixture("pipeline_instances", "running_pipeline_with_complete_job")
+    visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
+    click_on "Log"
+    within "#Log" do
+      assert_text "Log for previous"
+      log_link = find("a", text: "Log for previous")
+      assert_includes(log_link[:href],
+                      pi["components"]["previous"]["job"]["log"])
+      assert_selector "#event_log_div"
+    end
+  end
+
+  test "job logs linked for complete pipeline" do
+    pi = api_fixture("pipeline_instances", "complete_pipeline_with_two_jobs")
+    visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
+    click_on "Log"
+    within "#Log" do
+      assert_text "Log for previous"
+      pi["components"].each do |cname, cspec|
+        log_link = find("a", text: "Log for #{cname}")
+        assert_includes(log_link[:href], cspec["job"]["log"])
+      end
+      assert_no_selector "#event_log_div"
+    end
+  end
+
+  test "job logs linked for failed pipeline" do
+    pi = api_fixture("pipeline_instances", "failed_pipeline_with_two_jobs")
+    visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
+    click_on "Log"
+    within "#Log" do
+      assert_text "Log for previous"
+      pi["components"].each do |cname, cspec|
+        log_link = find("a", text: "Log for #{cname}")
+        assert_includes(log_link[:href], cspec["job"]["log"])
+      end
+      assert_no_selector "#event_log_div"
+    end
+  end
 end
index 6c9bd6698efb118851da8a11056037652950f0b5..9c2842f1c2c6d5ba62d07413d28ebecb415cf054 100644 (file)
@@ -1,5 +1,6 @@
 require 'integration_helper'
 require 'helpers/share_object_helper'
+require_relative 'integration_test_utils'
 
 class ProjectsTest < ActionDispatch::IntegrationTest
   include ShareObjectHelper
@@ -676,7 +677,7 @@ class ProjectsTest < ActionDispatch::IntegrationTest
     visit page_with_token 'active', '/projects/' + api_fixture('groups')['aproject']['uuid']
 
     # Point to a bad api server url to generate error
-    Rails.configuration.arvados_v1_base = "https://[100::f]:1/"
+    Rails.configuration.arvados_v1_base = "https://[::1]:1/"
     click_link 'Other objects'
     within '#Other_objects' do
       # Error
@@ -722,4 +723,111 @@ class ProjectsTest < ActionDispatch::IntegrationTest
     find("#page-wrapper .nav-tabs :first-child a").click
     assert_text("Collection modified at")
   end
+
+  # "Select all" and "Unselect all" options
+  test "select all and unselect all actions" do
+    need_selenium 'to check and uncheck checkboxes'
+
+    visit page_with_token 'active', '/projects/' + api_fixture('groups')['aproject']['uuid']
+
+    # Go to "Data collections" tab and click on "Select all"
+    click_link 'Data collections'
+    wait_for_ajax
+
+    # Initially, all selection options for this tab should be disabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Select all
+    click_button 'Select all'
+
+    assert_checkboxes_state('input[type=checkbox]', true, '"select all" should check all checkboxes')
+
+    # Now the selection options should be enabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Go to Jobs and pipelines tab and assert none selected
+    click_link 'Jobs and pipelines'
+    wait_for_ajax
+
+    # Since this is the first visit to this tab, all selection options should be disabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    assert_checkboxes_state('input[type=checkbox]', false, '"select all" should check all checkboxes')
+
+    # Select all
+    click_button 'Select all'
+    assert_checkboxes_state('input[type=checkbox]', true, '"select all" should check all checkboxes')
+
+    # Applicable selection options should be enabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li', text: 'Copy selected'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Unselect all
+    click_button 'Unselect all'
+    assert_checkboxes_state('input[type=checkbox]', false, '"select all" should check all checkboxes')
+
+    # All selection options should be disabled again
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Go back to Data collections tab and verify all are still selected
+    click_link 'Data collections'
+    wait_for_ajax
+
+    # Selection options should be enabled based on the fact that all collections are still selected in this tab
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    assert_checkboxes_state('input[type=checkbox]', true, '"select all" should check all checkboxes')
+
+    # Unselect all
+    find('button#unselect-all').click
+    assert_checkboxes_state('input[type=checkbox]', false, '"unselect all" should clear all checkboxes')
+
+    # Now all selection options should be disabled because none of the collections are checked
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Verify checking just one checkbox still works as expected
+    within('tr', text: api_fixture('collections')['collection_to_move_around_in_aproject']['name']) do
+      find('input[type=checkbox]').click
+    end
+
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+  end
 end
index 4a15851636fba6f0502f741d181af42727a921af..4e1920a939de88d67ffb7cabee8f40ae3c482caa 100644 (file)
@@ -84,7 +84,8 @@ class ReportIssueTest < ActionDispatch::IntegrationTest
     ['active', api_fixture('users')['active']],
     ['admin', api_fixture('users')['admin']],
     ['active_no_prefs', api_fixture('users')['active_no_prefs']],
-    ['active_no_prefs_profile', api_fixture('users')['active_no_prefs_profile']],
+    ['active_no_prefs_profile_no_getting_started_shown',
+        api_fixture('users')['active_no_prefs_profile_no_getting_started_shown']],
   ].each do |token, user|
 
     test "check version info and report issue for user #{token}" do
diff --git a/apps/workbench/test/integration/repositories_browse_test.rb b/apps/workbench/test/integration/repositories_browse_test.rb
new file mode 100644 (file)
index 0000000..a6a85b5
--- /dev/null
@@ -0,0 +1,53 @@
+require 'integration_helper'
+require 'helpers/repository_stub_helper'
+require 'helpers/share_object_helper'
+
+class RepositoriesTest < ActionDispatch::IntegrationTest
+  include RepositoryStubHelper
+  include ShareObjectHelper
+
+  reset_api_fixtures :after_each_test, false
+
+  setup do
+    need_javascript
+  end
+
+  test "browse repository from jobs#show" do
+    sha1 = api_fixture('jobs')['running']['script_version']
+    _, fakecommit, fakefile =
+      stub_repo_content sha1: sha1, filename: 'crunch_scripts/hash'
+    show_object_using 'active', 'jobs', 'running', sha1
+    click_on api_fixture('jobs')['running']['script']
+    assert_text fakefile
+    click_on 'crunch_scripts'
+    assert_selector 'td a', text: 'hash'
+    click_on 'foo'
+    assert_selector 'td a', text: 'crunch_scripts'
+    click_on sha1
+    assert_text fakecommit
+
+    show_object_using 'active', 'jobs', 'running', sha1
+    click_on 'active/foo'
+    assert_selector 'td a', text: 'crunch_scripts'
+
+    show_object_using 'active', 'jobs', 'running', sha1
+    click_on sha1
+    assert_text fakecommit
+  end
+
+  test "browse using arv-git-http" do
+    repo = api_fixture('repositories')['foo']
+    portfile =
+      File.expand_path('../../../../../tmp/arv-git-httpd-ssl.port', __FILE__)
+    gitsslport = File.read(portfile)
+    Repository.any_instance.
+      stubs(:http_fetch_url).
+      returns "https://localhost:#{gitsslport}/#{repo['name']}.git"
+    commit_sha1 = '1de84a854e2b440dc53bf42f8548afa4c17da332'
+    visit page_with_token('active', "/repositories/#{repo['uuid']}/commit/#{commit_sha1}")
+    assert_text "Date:   Tue Mar 18 15:55:28 2014 -0400"
+    visit page_with_token('active', "/repositories/#{repo['uuid']}/tree/#{commit_sha1}")
+    assert_selector "tbody td a", "foo"
+    assert_text "12 bytes"
+  end
+end
index 6d680e2d67fe2926c6cbb9177970d5ef9bdeec86..e50907bf8b3d8a67c6f307faab011b19aa4210c7 100644 (file)
@@ -9,7 +9,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
   def verify_manage_account user
     if user['is_active']
       within('.navbar-fixed-top') do
-        find('a', text: "#{user['email']}").click
+        page.find("#notifications-menu").click
         within('.dropdown-menu') do
           find('a', text: 'Manage account').click
         end
@@ -24,7 +24,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
       add_and_verify_ssh_key
     else  # inactive user
       within('.navbar-fixed-top') do
-        find('a', text: "#{user['email']}").click
+        page.find("#notifications-menu").click
         within('.dropdown-menu') do
           assert page.has_no_link?('Manage profile'), 'Found link - Manage profile'
         end
@@ -162,7 +162,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
     # Revisit the page and verify the request sent message along with
     # the request button.
     within('.navbar-fixed-top') do
-      find('a', text: 'spectator').click
+      page.find("#notifications-menu").click
       within('.dropdown-menu') do
         find('a', text: 'Manage account').click
       end
index accc6a7963ef5a9a8dee8ad188f0c8552d9d070d..3aac6a0ea08375b7c152babf1d9086553f9af8c8 100644 (file)
@@ -24,20 +24,31 @@ class UserProfileTest < ActionDispatch::IntegrationTest
         assert page.has_no_text?('Save profile'), 'Found text - Save profile'
       end
     elsif invited
-      assert page.has_text?('Please check the box below to indicate that you have read and accepted the user agreement'), 'Not found text - Please check the box below . . .'
+      assert page.has_text?('Please check the box below to indicate that you have read and accepted the user agreement'),
+        'Not found text - Please check the box below . . .'
       assert page.has_no_text?('Save profile'), 'Found text - Save profile'
     else
       assert page.has_text?('Your account is inactive'), 'Not found text - Your account is inactive'
       assert page.has_no_text?('Save profile'), 'Found text - Save profile'
     end
 
+    # If the user has not already seen getting_started modal, it will be shown on first visit.
+    if user and user['is_active'] and !user['prefs']['getting_started_shown']
+      within '.modal-content' do
+        assert_text 'Getting Started'
+        assert_selector 'button', text: 'Next'
+        assert_selector 'button', text: 'Prev'
+        first('button', text: 'x').click
+      end
+    end
+
     within('.navbar-fixed-top') do
       if !user
         assert page.has_link?('Log in'), 'Not found link - Log in'
       else
         # my account menu
-        assert page.has_link?("#{user['email']}"), 'Not found link - email'
-        find('a', text: "#{user['email']}").click
+        assert(page.has_link?("notifications-menu"), 'no user menu')
+        page.find("#notifications-menu").click
         within('.dropdown-menu') do
           if user['is_active']
             assert page.has_no_link?('Not active'), 'Found link - Not active'
@@ -98,7 +109,11 @@ class UserProfileTest < ActionDispatch::IntegrationTest
     click_button "Save profile"
     # profile saved and in profile page now with success
     assert page.has_text?('Thank you for filling in your profile'), 'No text - Thank you for filling'
-    click_link 'Back to work!'
+    if user['prefs']['getting_started_shown']
+      click_link 'Back to work!'
+    else
+      click_link 'Get started'
+    end
 
     # profile saved and in home page now
     assert page.has_text?('Active pipelines'), 'No text - Active pipelines'
@@ -111,7 +126,10 @@ class UserProfileTest < ActionDispatch::IntegrationTest
     ['active', api_fixture('users')['active'], true, true],
     ['admin', api_fixture('users')['admin'], true, true],
     ['active_no_prefs', api_fixture('users')['active_no_prefs'], true, false],
-    ['active_no_prefs_profile', api_fixture('users')['active_no_prefs_profile'], true, false],
+    ['active_no_prefs_profile_no_getting_started_shown',
+      api_fixture('users')['active_no_prefs_profile_no_getting_started_shown'], true, false],
+    ['active_no_prefs_profile_with_getting_started_shown',
+      api_fixture('users')['active_no_prefs_profile_with_getting_started_shown'], true, false],
   ].each do |token, user, invited, has_profile|
 
     test "visit home page when profile is configured for user #{token}" do
index fdde55d41db63aced4559dc8d03409a58cc7246c..f335722c07673613cc15a192b8b9d191b3a605cc 100644 (file)
@@ -44,7 +44,7 @@ class ActiveSupport::TestCase
     end
   end
 
-  setup do
+  teardown do
     Thread.current[:arvados_api_token] = nil
     Thread.current[:user] = nil
     Thread.current[:reader_tokens] = nil
@@ -137,7 +137,7 @@ class ApiServerForTests
   @main_process_pid = $$
   @@server_is_running = false
 
-  def check_call *args
+  def check_output *args
     output = nil
     Bundler.with_clean_env do
       output = IO.popen *args do |io|
@@ -153,7 +153,12 @@ class ApiServerForTests
   def run_test_server
     env_script = nil
     Dir.chdir PYTHON_TESTS_DIR do
-      env_script = check_call %w(python ./run_test_server.py start --auth admin)
+      # These are no-ops if we're running within run-tests.sh (except
+      # that we do get a useful env_script back from "start", even
+      # though it doesn't need to start up a new server).
+      env_script = check_output %w(python ./run_test_server.py start --auth admin)
+      check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_nginx)
     end
     test_env = {}
     env_script.each_line do |line|
@@ -169,8 +174,10 @@ class ApiServerForTests
 
   def stop_test_server
     Dir.chdir PYTHON_TESTS_DIR do
-      # This is a no-op if we're running within run-tests.sh
-      check_call %w(python ./run_test_server.py stop)
+      # These are no-ops if we're running within run-tests.sh
+      check_output %w(python ./run_test_server.py stop_nginx)
+      check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop)
     end
     @@server_is_running = false
   end
@@ -196,7 +203,7 @@ class ApiServerForTests
 
   def run_rake_task task_name, arg_string
     Dir.chdir ARV_API_SERVER_DIR do
-      check_call ['bundle', 'exec', 'rake', "#{task_name}[#{arg_string}]"]
+      check_output ['bundle', 'exec', 'rake', "#{task_name}[#{arg_string}]"]
     end
   end
 end
@@ -270,12 +277,17 @@ class ActiveSupport::TestCase
   end
 
   def after_teardown
-    if self.class.want_reset_api_fixtures[:after_each_test]
+    if self.class.want_reset_api_fixtures[:after_each_test] and
+        @want_reset_api_fixtures != false
       self.class.reset_api_fixtures_now
     end
     super
   end
 
+  def reset_api_fixtures_after_test t=true
+    @want_reset_api_fixtures = t
+  end
+
   protected
   def self.reset_api_fixtures_now
     # Never try to reset fixtures when we're just using test
index 4cad6e64b604b06858267055a60f81a25d13c096..747cfc12e5f6b4b70d4e586cbb7bd17c824dd070 100644 (file)
@@ -1,9 +1,13 @@
 require 'test_helper'
 
 class PipelineInstanceTest < ActiveSupport::TestCase
-  def attribute_editable_for?(token_name, pi_name, attr_name, ever=nil)
+  def find_pi_with(token_name, pi_name)
     use_token token_name
-    find_fixture(PipelineInstance, pi_name).attribute_editable?(attr_name, ever)
+    find_fixture(PipelineInstance, pi_name)
+  end
+
+  def attribute_editable_for?(token_name, pi_name, attr_name, ever=nil)
+    find_pi_with(token_name, pi_name).attribute_editable?(attr_name, ever)
   end
 
   test "admin can edit name" do
@@ -46,4 +50,62 @@ class PipelineInstanceTest < ActiveSupport::TestCase
                                    "components"),
            "components not editable on new pipeline")
   end
+
+  test "job_logs for partially complete pipeline" do
+    log_uuid = api_fixture("collections", "real_log_collection", "uuid")
+    pi = find_pi_with(:active, "running_pipeline_with_complete_job")
+    assert_equal({previous: log_uuid, running: nil}, pi.job_log_ids)
+  end
+
+  test "job_logs for complete pipeline" do
+    log_uuid = api_fixture("collections", "real_log_collection", "uuid")
+    pi = find_pi_with(:active, "complete_pipeline_with_two_jobs")
+    assert_equal({ancient: log_uuid, previous: log_uuid}, pi.job_log_ids)
+  end
+
+  test "job_logs for malformed pipeline" do
+    pi = find_pi_with(:active, "components_is_jobspec")
+    assert_empty(pi.job_log_ids.select { |_, log| not log.nil? })
+  end
+
+  def check_stderr_logs(token_name, pi_name, log_name)
+    pi = find_pi_with(token_name, pi_name)
+    actual_logs = pi.stderr_log_lines
+    expected_text = api_fixture("logs", log_name, "properties", "text")
+    expected_text.each_line do |log_line|
+      assert_includes(actual_logs, log_line.chomp)
+    end
+  end
+
+  test "stderr_logs for running pipeline" do
+    check_stderr_logs(:active,
+                      "pipeline_in_publicly_accessible_project",
+                      "log_line_for_pipeline_in_publicly_accessible_project")
+  end
+
+  test "stderr_logs for job in complete pipeline" do
+    check_stderr_logs(:active,
+                      "failed_pipeline_with_two_jobs",
+                      "crunchstat_for_previous_job")
+  end
+
+  test "has_readable_logs? for unrun pipeline" do
+    pi = find_pi_with(:active, "new_pipeline")
+    refute(pi.has_readable_logs?)
+  end
+
+  test "has_readable_logs? for running pipeline" do
+    pi = find_pi_with(:active, "running_pipeline_with_complete_job")
+    assert(pi.has_readable_logs?)
+  end
+
+  test "has_readable_logs? for complete pipeline" do
+    pi = find_pi_with(:active, "pipeline_in_publicly_accessible_project_but_other_objects_elsewhere")
+    assert(pi.has_readable_logs?)
+  end
+
+  test "has_readable_logs? for complete pipeline when jobs unreadable" do
+    pi = find_pi_with(:anonymous, "pipeline_in_publicly_accessible_project_but_other_objects_elsewhere")
+    refute(pi.has_readable_logs?)
+  end
 end
index 3d54c9c2b32c1fc05c2ce536a5ece807df59d49b..ca4116d7cee9015cbeb03bfaaf3ea4ca7dadfe03 100644 (file)
@@ -1,7 +1,8 @@
 import arvados
 import os
-import robust_put
 import stat
+import arvados.commands.run
+import logging
 
 # Implements "Virtual Working Directory"
 # Provides a way of emulating a shared writable directory in Keep based
@@ -32,23 +33,65 @@ def checkout(source_collection, target_dir, keepmount=None):
         for f in files:
             os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
 
-# Delete all symlinks and check in any remaining normal files.
-# If merge == True, merge the manifest with source_collection and return a
-# CollectionReader for the combined collection.
-def checkin(source_collection, target_dir, merge=True):
-    # delete symlinks, commit directory, merge manifests and return combined
-    # collection.
+def checkin(target_dir):
+    """Write files in `target_dir` to Keep.
+
+    Regular files or symlinks to files outside the keep mount are written to
+    Keep as normal files (Keep does not support symlinks).
+
+    Symlinks to files in the keep mount will result in files in the new
+    collection which reference existing Keep blocks, no data copying necessary.
+
+    Returns a new Collection object, with data flushed but the collection record
+    not saved to the API.
+
+    """
+
+    outputcollection = arvados.collection.Collection(num_retries=5)
+
+    if target_dir[-1:] != '/':
+        target_dir += '/'
+
+    collections = {}
+
+    logger = logging.getLogger("arvados")
+
+    last_error = None
     for root, dirs, files in os.walk(target_dir):
         for f in files:
-            s = os.lstat(os.path.join(root, f))
-            if stat.S_ISLNK(s.st_mode):
-                os.unlink(os.path.join(root, f))
-
-    uuid = robust_put.upload(target_dir)
-    if merge:
-        cr1 = arvados.CollectionReader(source_collection)
-        cr2 = arvados.CollectionReader(uuid)
-        combined = arvados.CollectionReader(cr1.manifest_text() + cr2.manifest_text())
-        return combined
-    else:
-        return arvados.CollectionReader(uuid)
+            try:
+                s = os.lstat(os.path.join(root, f))
+
+                writeIt = False
+
+                if stat.S_ISREG(s.st_mode):
+                    writeIt = True
+                elif stat.S_ISLNK(s.st_mode):
+                    # 1. check if it is a link into a collection
+                    real = os.path.split(os.path.realpath(os.path.join(root, f)))
+                    (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
+                    if pdh is not None:
+                        # 2. load collection
+                        if pdh not in collections:
+                            collections[pdh] = arvados.collection.CollectionReader(pdh,
+                                                                                   api_client=outputcollection._my_api(),
+                                                                                   keep_client=outputcollection._my_keep(),
+                                                                                   num_retries=5)
+                        # 3. copy arvfile to new collection
+                        outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
+                    else:
+                        writeIt = True
+
+                if writeIt:
+                    reldir = root[len(target_dir):]
+                    with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
+                        with open(os.path.join(root, f), "rb") as reader:
+                            dat = reader.read(64*1024)
+                            while dat:
+                                writer.write(dat)
+                                dat = reader.read(64*1024)
+            except (IOError, OSError) as e:
+                logger.error(e)
+                last_error = e
+
+    return (outputcollection, last_error)
index c07debd787eecfc6696c5d614d69c013f8028dd7..1ff63616ef638db376d49cfeecf8655f04dddbb1 100755 (executable)
@@ -57,12 +57,10 @@ else:
     jobp = json.loads(args.script_parameters)
     os.environ['JOB_UUID'] = 'zzzzz-8i9sb-1234567890abcde'
     os.environ['TASK_UUID'] = 'zzzzz-ot0gb-1234567890abcde'
-    os.environ['CRUNCH_SRC'] = '/tmp/crunche-src'
+    os.environ['CRUNCH_SRC'] = '/tmp/crunch-src'
     if 'TASK_KEEPMOUNT' not in os.environ:
         os.environ['TASK_KEEPMOUNT'] = '/keep'
 
-links = []
-
 def sub_tmpdir(v):
     return os.path.join(arvados.current_task().tmpdir, 'tmpdir')
 
@@ -415,31 +413,27 @@ signal.signal(signal.SIGINT, signal.SIG_DFL)
 signal.signal(signal.SIGTERM, signal.SIG_DFL)
 signal.signal(signal.SIGQUIT, signal.SIG_DFL)
 
-for l in links:
-    os.unlink(l)
-
 logger.info("the following output files will be saved to keep:")
 
-subprocess.call(["find", ".", "-type", "f", "-printf", "run-command: %12.12s %h/%f\\n"], stdout=sys.stderr)
+subprocess.call(["find", "-L", ".", "-type", "f", "-printf", "run-command: %12.12s %h/%f\\n"], stdout=sys.stderr, cwd=outdir)
 
 logger.info("start writing output to keep")
 
-if "task.vwd" in taskp:
-    if "task.foreach" in jobp:
-        # This is a subtask, so don't merge with the original collection, that will happen at the end
-        outcollection = vwd.checkin(subst.do_substitution(taskp, taskp["task.vwd"]), outdir, merge=False).manifest_text()
-    else:
-        # Just a single task, so do merge with the original collection
-        outcollection = vwd.checkin(subst.do_substitution(taskp, taskp["task.vwd"]), outdir, merge=True).manifest_text()
-else:
-    outcollection = robust_put.upload(outdir, logger)
+if "task.vwd" in taskp and "task.foreach" in jobp:
+    for root, dirs, files in os.walk(outdir):
+        for f in files:
+            s = os.lstat(os.path.join(root, f))
+            if stat.S_ISLNK(s.st_mode):
+                os.unlink(os.path.join(root, f))
+
+(outcollection, checkin_error) = vwd.checkin(outdir)
 
 # Success if we ran any subprocess, and they all exited 0.
-success = rcode and all(status == 0 for status in rcode.itervalues())
+success = rcode and all(status == 0 for status in rcode.itervalues()) and not checkin_error
 
 api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                      body={
-                                         'output': outcollection,
+                                         'output': outcollection.manifest_text(),
                                          'success': success,
                                          'progress':1.0
                                      }).execute()
index 394b2237ec1b71510bebbd51e4cc5e4a2d7ce13d..e4dc782d3164411c63aa7dc7f8c2481a63de967b 100644 (file)
@@ -13,16 +13,16 @@ arvados_workbench_host: localhost
 exclude: ["Rakefile", "tmp", "vendor"]
 
 navbar:
-  start:
-    - Getting Started:
-      - start/index.html.textile.liquid
-    - Quickstart:
-      - start/getting_started/publicproject.html.textile.liquid
-      - start/getting_started/firstpipeline.html.textile.liquid
-    - Common Use Cases:
-      - start/getting_started/sharedata.html.textile.liquid
-    - Next Steps:
-      - start/getting_started/nextsteps.html.textile.liquid
+  #start:
+    #- Getting Started:
+      #- start/index.html.textile.liquid
+    #- Quickstart:
+      #- start/getting_started/publicproject.html.textile.liquid
+      #- start/getting_started/firstpipeline.html.textile.liquid
+    #- Common Use Cases:
+      #- start/getting_started/sharedata.html.textile.liquid
+    #- Next Steps:
+      #- start/getting_started/nextsteps.html.textile.liquid
 
   userguide:
     - Welcome:
index 3e23e6d4e84d543e331964c43263bc3dc966e2ea..6d88b2b4768c0479cf8e77b9442f4f52da16427a 100644 (file)
@@ -7,16 +7,16 @@
         <span class="icon-bar"></span>
         <span class="icon-bar"></span>
       </button>
-      <a class="navbar-brand" href="{{ site.baseurl }}/">Arvados</a>
+      <a class="navbar-brand" href="{{ site.baseurl }}/">Arvados Docs</a>
     </div>
     <div class="collapse navbar-collapse" id="bs-navbar-collapse">
       <ul class="nav navbar-nav">
-        <li {% if page.navsection == 'start' %} class="active" {% endif %}><a href="{{ site.baseurl }}/start/index.html">Getting&nbsp;Started</a></li>
+        <!--<li {% if page.navsection == 'start' %} class="active" {% endif %}><a href="{{ site.baseurl }}/start/index.html">Getting&nbsp;Started</a></li>-->
         <li {% if page.navsection == 'userguide' %} class="active" {% endif %}><a href="{{ site.baseurl }}/user/index.html">User&nbsp;Guide</a></li>
         <li {% if page.navsection == 'sdk' %} class="active" {% endif %}><a href="{{ site.baseurl }}/sdk/index.html">SDKs</a></li>
         <li {% if page.navsection == 'api' %} class="active" {% endif %}><a href="{{ site.baseurl }}/api/index.html">API</a></li>
         <li {% if page.navsection == 'installguide' %} class="active" {% endif %}><a href="{{ site.baseurl }}/install/index.html">Install</a></li>
-        <li><a href="https://arvados.org/projects/arvados/" style="padding-left: 2em">Developer Site&nbsp;&raquo;</a></li>
+        <li><a href="https://arvados.org/projects/arvados/" style="padding-left: 2em">arvados.org&nbsp;&raquo;</a></li>
       </ul>
 
       <div class="pull-right" style="padding-top: 6px">
index b0b210f0e7a6397167636f0465d477175441600a..1acb37ec974f054795f0161c9fd6e8c219febd86 100644 (file)
@@ -2,7 +2,7 @@
 <html>
   <head>
     <meta charset="utf-8">
-    <title>{% unless page.title == "Arvados" %} Arvados | Documentation | {% endunless %}{{ page.title }}</title>
+    <title>{% unless page.title == "Arvados | Documentation" %} Arvados | Documentation | {% endunless %}{{ page.title }}</title>
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <meta name="description" content="">
     <meta name="author" content="">
index b57858e0ddc279d339de161fc041428e823fb96e..90a3c4c72216136909ca6fe1f5c074d5298b440f 100644 (file)
@@ -34,7 +34,7 @@ table(table table-bordered table-condensed).
 |minimum_script_version |string     |Git branch, tag, or commit hash specifying the minimum acceptable script version (earliest ancestor) to consider when deciding whether to re-use a past job.[1]|query|@"c3e86c9"@|
 |exclude_script_versions|array of strings|Git commit branches, tags, or hashes to exclude when deciding whether to re-use a past job.|query|@["8f03c71","8f03c71"]@
 @["badtag1","badtag2"]@|
-|filters|array|Conditions to find Jobs to reuse.|query||
+|filters|array of arrays|Conditions to find Jobs to reuse.|query||
 |find_or_create         |boolean    |Before creating, look for an existing job that has identical script, script_version, and script_parameters to those in the present job, has nondeterministic=false, and did not fail (it could be queued, running, or completed). If such a job exists, respond with the existing job instead of submitting a new one.|query|@false@|
 
 When a job is submitted to the queue using the **create** method, the @script_version@ attribute is updated to a full 40-character Git commit hash based on the current content of the specified repository. If @script_version@ cannot be resolved, the job submission is rejected.
@@ -60,8 +60,8 @@ Because Arvados records the exact version of the script, input parameters, and r
 notextile. <div class="spaced-out">
 
 # If @find_or_create@ is false or omitted, create a new job and skip the rest of these steps.
-# If @filters@ are specified, find jobs that match those filters.  Filters *must* be specified to limit the @repository@ and @script@ attributes.  An error is returned if they are missing.
-# If @filters@ are not specified, find jobs with the same @repository@ and @script@, with a @script_version@ between @minimum_script_version@ and @script_version@ (excluding @excluded_script_versions@), and a @docker_image_locator@ with the latest Collection that matches the submitted job's @docker_image@ constraint.  If the submitted job includes an @arvados_sdk_version@ constraint, jobs must have an @arvados_sdk_version@ between that refspec and HEAD to be found.
+# If @filters@ are specified, find jobs that match those filters. If any filters are given, there must be at least one filter on the @repository@ attribute and one on the @script@ attribute: otherwise an error is returned.
+# If @filters@ are not specified, find jobs with the same @repository@ and @script@, with a @script_version@ between @minimum_script_version@ and @script_version@ inclusively (excluding @excluded_script_versions@), and a @docker_image_locator@ with the latest Collection that matches the submitted job's @docker_image@ constraint.  If the submitted job includes an @arvados_sdk_version@ constraint, jobs must have an @arvados_sdk_version@ between that refspec and HEAD to be found. *This form is deprecated: use filters instead.*
 # If the found jobs include a completed job, and all found completed jobs have consistent output, return one of them.  Which specific job is returned is undefined.
 # If the found jobs only include incomplete jobs, return one of them.  Which specific job is returned is undefined.
 # If no job has been returned so far, create and return a new job.
index 80f5de6f06ff9eae7b31947bdfc3147605027cae..fd635034b1e8db79b97a1893de07d07f87ed95ee 100644 (file)
@@ -23,7 +23,10 @@ table(table table-bordered table-condensed).
 |_. Attribute|_. Type|_. Description|_. Notes|
 |script|string|The filename of the job script.|This program will be invoked by Crunch for each job task. It is given as a path to an executable file, relative to the @/crunch_scripts@ directory in the Git tree specified by the _repository_ and _script_version_ attributes.|
 |script_parameters|hash|The input parameters for the job.|Conventionally, one of the parameters is called @"input"@. Typically, some parameter values are collection UUIDs. Ultimately, though, the significance of parameters is left entirely up to the script itself.|
-|repository|string|Git repository|Given as the name of a locally hosted Git repository.|
+|repository|string|Git repository name or URL.|Source of the repository where the given script_version is to be found. This can be given as the name of a locally hosted repository, or as a publicly accessible URL starting with @git://@, @http://@, or @https://@.
+Examples:
+@yourusername/yourrepo@
+@https://github.com/curoverse/arvados.git@|
 |script_version|string|Git commit|During a **create** transaction, this is the Git branch, tag, or hash supplied by the client. Before the job starts, Arvados updates it to the full 40-character SHA-1 hash of the commit used by the job.
 See "Specifying Git versions":#script_version below for more detail about acceptable ways to specify a commit.|
 |cancelled_by_client_uuid|string|API client ID|Is null if job has not been cancelled|
index 27cc711985f0a21f59b912bb9aa1cade80307b4b..0f9b25ec2ce911be6a0f5a822c82ec5e8bc1e5a8 100644 (file)
@@ -19,5 +19,7 @@ Each Repository has, in addition to the usual "attributes of Arvados resources":
 table(table table-bordered table-condensed).
 |_. Attribute|_. Type|_. Description|_. Example|
 |name|string|The name of the repository on disk.  Repository names must begin with a letter and contain only alphanumerics.  Unless the repository is owned by the system user, the name must begin with the owner's username, then be separated from the base repository name with @/@.  You may not create a repository that is owned by a user without a username.|@username/project1@|
-|fetch_url|string|The git remote's fetch URL for the repository.  Read-only.||
-|push_url|string|The git remote's push URL for the repository.  Read-only.||
+|clone_urls|array|URLs from which the repository can be cloned. Read-only.|@["git@git.zzzzz.arvadosapi.com:foo/bar.git",
+ "https://git.zzzzz.arvadosapi.com/foo/bar.git"]@|
+|fetch_url|string|URL suggested as a fetch-url in git config. Deprecated. Read-only.||
+|push_url|string|URL suggested as a push-url in git config. Deprecated. Read-only.||
index 53a798ffa6adaa58f2f5fa8b1ff3aaf562c19be1..4e951d6360de861029f7f62d94aa822801267cf4 100644 (file)
@@ -2,7 +2,7 @@
 layout: default
 no_nav_left: true
 navsection: top
-title: Arvados
+title: Arvados | Documentation
 ...
 
 <div class="jumbotron">
@@ -21,28 +21,53 @@ title: Arvados
 
 <div class="container-fluid">
   <div class="row">
-    <div class="col-sm-5">
+    <div class="col-sm-6">
+      <p><strong>What is Arvados</strong>
       <p><a href="https://arvados.org/">Arvados</a> enables you to quickly begin using cloud computing resources in your data science work. It allows you to track your methods and datasets, share them securely, and easily re-run analyses.
       </p>
-      <p><strong>Quickstart</strong>: Check out our <a href="{{ site.baseurl }}/start/index.html">key features</a>, complete with screenshots, and then follow our tutorial to <a href="{{ site.baseurl }}/start/getting_started/firstpipeline.html">run your first pipeline</a> using our <a href="http://lp.curoverse.com/beta-signup/">public beta</a>.
+      <p><strong>News</strong>
+      <p>Read our <a href="https://arvados.org/projects/arvados/blogs">blog updates</a> or look through our <a href="https://arvados.org/projects/arvados/activity">recent developer activity</a>.
       </p>
-      <p><strong>News</strong>: Read our <a href="https://arvados.org/projects/arvados/blogs">blog updates</a> or look through our <a href="https://arvados.org/projects/arvados/activity">recent developer activity</a>.
+      <p><strong>Questions?</strong></p>
+      <p>Email <a href="http://lists.arvados.org/mailman/listinfo/arvados">the mailing list</a>, or chat with us on IRC: <a href="irc://irc.oftc.net:6667/#arvados">#arvados</a> @ OFTC (you can <a href="https://webchat.oftc.net/?channels=arvados">join in your browser</a>).
       </p>
-      <p><strong>Questions?</strong> Email <a href="http://lists.arvados.org/mailman/listinfo/arvados">the mailing list</a>, or chat with us on IRC: <a href="irc://irc.oftc.net:6667/#arvados">#arvados</a> @ OFTC (you can <a href="https://webchat.oftc.net/?channels=arvados">join in your browser</a>).
+      <p><strong>Want to contribute?</strong></p>
+      <p>Check out our <a href="https://arvados.org/projects/arvados">developer site</a>. We're open source, check out our code on <a href="https://github.com/curoverse/arvados">github</a>.
       </p>
-      <p><strong>Want to contribute?</strong> Check out our <a href="https://arvados.org/projects/arvados">developer site</a>. We're open source, check out our code on <a href="https://github.com/curoverse/arvados">github</a>.
-      </p>
-      <p><strong>License</strong>: Arvados is under the copyleft <a href="{{ site.baseurl }}/user/copying/agpl-3.0.html">GNU AGPL v3</a>, with our SDKs under <a href="{{ site.baseurl }}/user/copying/LICENSE-2.0.html">Apache License 2.0</a> (so that you can incorporate proprietary toolchains into your pipelines).
+      <p><strong>License</strong></p>
+      <p>Arvados is under the copyleft <a href="{{ site.baseurl }}/user/copying/agpl-3.0.html">GNU AGPL v3</a>, with our SDKs under <a href="{{ site.baseurl }}/user/copying/LICENSE-2.0.html">Apache License 2.0</a> (so that you can incorporate proprietary toolchains into your pipelines).
       </p>
 
     </div>
-    <div class="col-sm-7" style="border-left: solid; border-width: 1px">
-      <p>Below you can also find more in-depth guides for using Arvados.
+    <div class="col-sm-6" style="border-left: solid; border-width: 1px">
+      <p><strong>Quickstart</strong> 
+      <p>
+        Try any pipeline from the <a href="https://arvados.org/projects/arvados/wiki/Public_Pipelines_and_Datasets">list of public pipelines</a>. For instance, the <a href="http://curover.se/pathomap">Pathomap Pipeline</a> links to these <a href="https://arvados.org/projects/arvados/wiki/pathomap_tutorial/">step-by-step instructions</a> for trying Arvados out right in your browser using Curoverse's <a href="http://lp.curoverse.com/beta-signup/">public Arvados instance</a>.
       </p>
-      <br>
+        <!--<p>-->
+      <!--<ol>-->
+         <!--<li>-->
+           <!--Go to <a href="https://{{ site.arvados_workbench_host }}/" target="_blank">https://{{ site.arvados_workbench_host }}/</a>-->
+        <!--</li><li>-->
+          <!--Register with any Google account-->
+        <!--</li><li>-->
+        <!--Follow the Getting Started guide-->
+        <!--<br>-->
+        <!--<em>Tip: Don't see the guide? You can find it by clicking (in the upper-right corner) <span class="fa fa-lg fa-question-circle"></span> &gt; Getting Started)</em>-->
+        <!--</li>-->
+      <!--</ol>-->
+      <!--</p>-->
+      <p><strong>
+        Pipeline Developer Quickstart
+      </strong></p>
       <p>
-        <a href="{{ site.baseurl }}/start/index.html">Getting Started</a> &mdash; Start here if you're new to Arvados.
+      Want to port your pipeline to Arvados? Check out the step-by-step <a href="https://arvados.org/projects/arvados/wiki/Port_a_Pipeline">Port-a-Pipeline</a> guide on the Arvados wiki.
       </p>
+      <p><strong>More in-depth guides
+      </strong></p>
+      <!--<p>-->
+        <!--<a href="{{ site.baseurl }}/start/index.html">Getting Started</a> &mdash; Start here if you're new to Arvados.-->
+      <!--</p>-->
       <p>
         <a href="{{ site.baseurl }}/user/index.html">User Guide</a> &mdash; How to manage data and do analysis with Arvados.
       </p>
index 4e105e82ec392d2a809381b736ee47228c082430..6901ea3d44585401e755ccd8b5d4dc4ed2e9c056 100644 (file)
@@ -13,7 +13,7 @@ This will be readable by the "All users" group, and therefore by every active us
 <pre><code>~$ <span class="userinput">prefix=`arv --format=uuid user current | cut -d- -f1`</span>
 ~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
 ~$ <span class="userinput">all_users_group_uuid="$prefix-j7d0g-fffffffffffffff"</span>
-~$ <span class="userinput">repo_uuid=`arv --format=uuid repository create --repository '{"name":"arvados"}'`</span>
+~$ <span class="userinput">repo_uuid=`arv --format=uuid repository create --repository '{"owner_uuid":"$prefix-tpzed-000000000000000", "name":"arvados"}'`</span>
 ~$ <span class="userinput">echo "Arvados repository uuid is '$repo_uuid'"</span>
 ~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
 <span class="userinput">{
index ef2e474f821bfae65ad280cdc1c1ab5f602bdb16..b65fe6975dfd9dbe4e2c8709e13a9c684fc7c2f7 100644 (file)
@@ -77,7 +77,7 @@ h3. secret_token
 Generate a new secret token for signing cookies:
 
 <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">rake secret</span>
+<pre><code>~/arvados/services/api$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 </code></pre></notextile>
 
index 7fb810d841913c34603126229baae98af7cb0ddf..eb537836f5be2a4e572cc267939df7b854d2cbea 100644 (file)
@@ -33,31 +33,36 @@ Verify that Keepstore is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepstore -h</span>
-2014/10/29 14:23:38 Keep started: pid 6848
-Usage of keepstore:
+2015/05/08 13:41:16 keepstore starting, pid 2565
+Usage of ./keepstore:
+  -blob-signature-ttl=1209600: Lifetime of blob permission signatures. See services/api/config/application.default.yml.
+  -blob-signing-key-file="": File containing the secret key for generating and verifying blob permission signatures.
   -data-manager-token-file="": File with the API token used by the Data Manager. All DELETE requests or GET /index requests must carry this token.
   -enforce-permissions=false: Enforce permission signatures on requests.
-  -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
+  -listen=":25107": Listening address, in the form "host:port". e.g., 10.0.1.24:8000. Omit the host part to listen on all interfaces.
+  -max-buffers=128: Maximum RAM to use for data buffers, given in multiples of block size (64 MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.
   -never-delete=false: If set, nothing will be deleted. HTTP 405 will be returned for valid DELETE requests.
-  -permission-key-file="": File containing the secret key for generating and verifying permission signatures.
-  -permission-ttl=1209600: Expiration time (in seconds) for newly generated permission signatures.
-  -pid="": Path to write pid file
-  -serialize=false: If set, all read and write operations on local Keep volumes will be serialized.
-  -volumes="": Comma-separated list of directories to use for Keep volumes, e.g. -volumes=/var/keep1,/var/keep2. If empty or not supplied, Keep will scan mounted filesystems for volumes with a /keep top-level directory.
+  -permission-key-file="": Synonym for -blob-signing-key-file.
+  -permission-ttl=0: Synonym for -blob-signature-ttl.
+  -pid="": Path to write pid file during startup. This file is kept open and locked with LOCK_EX until keepstore exits, so `fuser -k pidfile` is one way to shut down. Exit immediately if there is an error opening, locking, or writing the pid file.
+  -readonly=false: Do not write, delete, or touch anything on the following volumes.
+  -serialize=false: Serialize read and write operations on the following volumes.
+  -volume=[]: Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named "keep" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.
+  -volumes=[]: Deprecated synonym for -volume.
 </code></pre>
 </notextile>
 
-If you want access control on your Keepstore server(s), you should provide a permission key. The @-permission-key-file@ argument should contain the path to a file that contains a single line with a long random alphanumeric string. It should be the same as the @blob_signing_key@ that can be set in the "API server":install-api-server.html config/application.yml file.
+If you want access control on your Keepstore server(s), you must specify the @-enforce-permissions@ flag and provide a signing key. The @-blob-signing-key-file@ argument should be a file containing a long random alphanumeric string with no internal line breaks (it is also possible to use a socket or FIFO: keepstore reads it only once, at startup). This key must be the same as the @blob_signing_key@ configured in the "API server":install-api-server.html config/application.yml file.
+
+The @-max-buffers@ argument can be used to restrict keepstore's memory use. By default, keepstore will allocate no more than 128 blocks (8 GiB) worth of data buffers at a time. Normally this should be set as high as possible without risking swapping.
 
 Prepare one or more volumes for Keepstore to use. Simply create a /keep directory on all the partitions you would like Keepstore to use, and then start Keepstore. For example, using 2 tmpfs volumes:
 
 <notextile>
-<pre><code>~$ <span class="userinput">keepstore</span>
-2014/10/29 11:41:37 Keep started: pid 20736
-2014/10/29 11:41:37 adding Keep volume: /tmp/tmp.vwSCtUCyeH/keep
-2014/10/29 11:41:37 adding Keep volume: /tmp/tmp.Lsn4w8N3Xv/keep
-2014/10/29 11:41:37 Running without a PermissionSecret. Block locators returned by this server will not be signed, and will be rejected by a server that enforces permissions.
-2014/10/29 11:41:37 To fix this, run Keep with --permission-key-file=<path> to define the location of a file containing the permission key.
+<pre><code>~$ <span class="userinput">keepstore -blob-signing-key-file=./blob-signing-key</span>
+2015/05/08 13:44:26 keepstore starting, pid 2765
+2015/05/08 13:44:26 Using volume [UnixVolume /mnt/keep] (writable=true)
+2015/05/08 13:44:26 listening at :25107
 
 </code></pre>
 </notextile>
index 4f6a9771f1d08bfed9fccc1cee30e9121efeca61..b0ad8b4253820fe3bd33a1a30b3c5f47e4cc2297 100644 (file)
@@ -28,12 +28,12 @@ Create a secret:
 
 <notextile>
 <pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/initializers/secret_token.rb.example config/initializers/secret_token.rb</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">rake secret</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 </code></pre>
 </notextile>
 
-Edit @config/initializers/secret_token.rb@ to set @config.secret_token@ to the string produced by @rake secret@.
+Edit @config/initializers/secret_token.rb@ to set @config.secret_token@ to the string produced by @rand@ above.
 
 h3. Configure upstream authentication provider
 
@@ -51,7 +51,7 @@ h3(#client). Create arvados-server client
 Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the @APP_ID@ and @APP_SECRET@ that must be set in in "Setting up Omniauth in the API server.":install-api-server.html#omniauth
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">rake secret</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 ~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
 :001 &gt; <span class="userinput">c = Client.new</span>
index 43e6f418737d5d715601d6bdf70789e3f089f723..4131849ba8f21d6687ba579eb09d12b24e6ee2ea 100644 (file)
@@ -12,7 +12,7 @@ h2. Install prerequisites
 <pre><code>~$ <span class="userinput">sudo apt-get install \
     bison build-essential gettext libcurl3 libcurl3-gnutls \
     libcurl4-openssl-dev libpcre3-dev libpq-dev libreadline-dev \
-    libssl-dev libxslt1.1 git wget zlib1g-dev graphviz libsqlite3-dev
+    libssl-dev libxslt1.1 git wget zlib1g-dev graphviz
 </span></code></pre></notextile>
 
 Also make sure you have "Ruby and bundler":install-manual-prerequisites-ruby.html installed.
@@ -83,7 +83,7 @@ h3. secret_token
 This application needs a secret token. Generate a new secret:
 
 <notextile>
-<pre><code>~/arvados/apps/workbench$ <span class="userinput">rake secret</span>
+<pre><code>~/arvados/apps/workbench$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 </code></pre>
 </notextile>
index b108ff707708a35d3c95c97c1e153c306b753055..996eb8a4c0d2bd351400ff7581e10f06057e83e7 100644 (file)
@@ -4,7 +4,7 @@ navsection: userguide
 title: Welcome to Arvados!
 ...
 
-_If you are new to Arvados, please read the "Getting Started":{{site.baseurl}}/start/index.html guide for a quick introduction to working with Arvados._
+_If you are new to Arvados, please try the Quickstart on <a href="http://doc.arvados.org">the documentation homepage</a> instead of this detailed User Guide._
 
 This guide provides a reference for using Arvados to solve big data bioinformatics problems, including:
 
@@ -13,8 +13,6 @@ This guide provides a reference for using Arvados to solve big data bioinformati
 * Storing and querying metadata about genome sequence files, such as human subjects and their phenotypic traits using the "Arvados Metadata Database.":{{site.baseurl}}/user/topics/tutorial-trait-search.html
 * Accessing, organizing, and sharing data, pipelines and results using the "Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html web application.
 
-This User Guide goes into more depth than the "Getting Started guide":{{site.baseurl}}/start/index.html, covers how to develop your own pipelines in addition to using pre-existing pipelines, covers the Arvados commandline tools in addition to the Workbench graphical interface to Arvados, and can be referenced in any order.
-
 The examples in this guide use the Arvados instance located at <a href="https://{{ site.arvados_workbench_host }}/" target="_blank">https://{{ site.arvados_workbench_host }}</a>.  If you are using a different Arvados instance replace @{{ site.arvados_workbench_host }}@ with your private instance in all of the examples in this guide.
 
 Curoverse maintains a public Arvados instance located at <a href="https://workbench.qr1hi.arvadosapi.com/" target="_blank">https://workbench.qr1hi.arvadosapi.com/</a>.  You must have an account in order to use this service.  If you would like to request an account, please send an email to "arvados@curoverse.com":mailto:arvados@curoverse.com.
index f1d42adceb9aa9d69a8d0529f2d1013ca92b7ec5..5ffeb1836701dc83735e0377af0ce72ed228ff19 100644 (file)
@@ -73,6 +73,12 @@ table(table table-bordered table-condensed).
 |$(dir ...)        | Takes a reference to an Arvados collection or directory within an Arvados collection and evaluates to a directory path on the local file system where that directory can be accessed by your command.  The path may include a file name, in which case it will evaluate to the parent directory of the file.  Uses Python's os.path.dirname(), so "/foo/bar" will evaluate to "/foo" but "/foo/bar/" will evaluate to "/foo/bar".  Will raise an error if the directory is not accessible. |
 |$(basename&nbsp;...)   | Strip leading directory and trailing file extension from the path provided.  For example, $(basename /foo/bar.baz.txt) will evaluate to "bar.baz".|
 |$(glob ...)       | Take a Unix shell path pattern (supports @*@ @?@ and @[]@) and search the local filesystem, returning the first match found.  Use together with $(dir ...) to get a local filesystem path for Arvados collections.  For example: $(glob $(dir $(mycollection)/*.bam)) will find the first .bam file in the collection specified by the user parameter "mycollection".  If there is more than one match, which one is returned is undefined.  Will raise an error if no matches are found.|
+|$(task.tmpdir)|Designated temporary directory.  This directory will be discarded when the job completes.|
+|$(task.outdir)|Designated output directory.  The contents of this directory will be saved to Keep when the job completes.  A symlink to a file in the keep mount will reference existing Keep blocks in your job output collection, with no data copying or duplication.|
+|$(job.srcdir)|Path to the git working directory ($CRUNCH_SRC).|
+|$(node.cores)|Number of CPU cores on the node.|
+|$(job.uuid)|Current job uuid ($JOB_UUID)|
+|$(task.uuid)|Current task uuid ($TASK_UUID)|
 
 h3. Escape sequences
 
@@ -235,7 +241,7 @@ h3. task.vwd
 
 Background: because Keep collections are read-only, this does not play well with certain tools that expect to be able to write their outputs alongside their inputs (such as tools that generate indexes that are closely associated with the original file.)  The run-command's solution to this is the "virtual working directory".
 
-@task.vwd@ specifies a Keep collection with the starting contents of the directory.  @run-command@ will then populate @task.outdir@ with directories and symlinks to mirror the contents of the @task.vwd@ collection.  Your command will then be able to both access its input files and write its output files in @task.outdir@.  When the command completes, the output collection will merge the output of your command with the contents of the starting collection.  Note that files in the starting collection remain read-only and cannot be altered or deleted.
+@task.vwd@ specifies a Keep collection with the starting contents of the output directory.  @run-command@ will populate @task.outdir@ with directories and symlinks to mirror the contents of the @task.vwd@ collection.  Your command will then be able to both access its input files and write its output files from within @task.outdir@.  When the command completes, run-command will write the contents of the output directory, which will include the output of your command as well as symlinks to files in starting collection.  Note that files from the starting collection remain read-only and cannot be altered, but may be deleted or renamed.
 
 h3. task.foreach
 
index 3cfb5a99434aae37c2d84960490c08cc1f137eb3..627e775ed077f4ae5262ffb77cb4d58113870923 100644 (file)
@@ -20,7 +20,11 @@ development:
 
 production:
   host: api.dev.arvados
-  git_host: api.dev.arvados
+
+  git_repo_ssh_base: "git@api.dev.arvados:"
+
+  # Docker setup doesn't include arv-git-httpd yet.
+  git_repo_https_base: false
 
   # At minimum, you need a nice long randomly generated secret_token here.
   # Use a long string of alphanumeric characters (at least 36).
index a0b4cb0d2cb57101f9b287c83fffb2b9e34a7a20..385f0e6c04cb28ce05ef2f17df174dbc6d52cdce 100755 (executable)
@@ -8,4 +8,4 @@ else
     permission_args=""
 fi
 
-exec keepstore $permission_args -listen=":25107" -volumes="/keep-data"
+exec keepstore $permission_args -listen=":25107" -volume="/keep-data"
index b4010ef4d69e5603d417763bed611fae1f0b4832..319104ac003a742d74d70c8198c69d97f25d074b 100755 (executable)
@@ -118,7 +118,7 @@ fi
 # will be filled in later, if [ -z "$skipDetection" ]
 lsbDist=''
 
-target="/tmp/docker-rootfs-debootstrap-$suite-$$-$RANDOM"
+target="${TMPDIR:-/tmp}/docker-rootfs-debootstrap-$suite-$$-$RANDOM"
 
 cd "$(dirname "$(readlink -f "$BASH_SOURCE")")"
 returnTo="$(pwd -P)"
index 99e3f4e41dfd8fbd925edecc4e78b966416506e3..4817bab4da4d91da66e497279bed4ac1654a1c3f 100644 (file)
@@ -9,6 +9,7 @@ RUN git clone git://github.com/curoverse/sso-devise-omniauth-provider.git /usr/s
 # Install generated config files
 ADD generated/secret_token.rb /usr/src/sso-provider/config/initializers/secret_token.rb
 ADD generated/seeds.rb /usr/src/sso-provider/db/seeds.rb
+ADD generated/application.yml /usr/src/sso-provider/config/application.yml
 ADD generated/apache2_vhost /etc/apache2/sites-available/sso-provider
 ADD generated/apache2_vhost /etc/apache2/sites-available/sso-provider
 
@@ -18,9 +19,10 @@ RUN a2dissite default && \
     a2enmod rewrite && \
     a2enmod ssl && \
     cd /usr/src/sso-provider && \
+    cp config/environments/production.rb.example config/environments/production.rb && \
     RAILS_ENV=production /usr/local/rvm/bin/rvm-exec default bundle exec rake db:setup && \
     /usr/local/rvm/bin/rvm-exec default bundle exec rake assets:precompile && \
-    chown www-data:www-data tmp_omniauth log config.ru -R && \
+    chown www-data:www-data log config.ru -R && \
     chown www-data:www-data db db/production.sqlite3 && \
     /bin/mkdir /var/run/apache2
 
diff --git a/docker/sso/application.yml.in b/docker/sso/application.yml.in
new file mode 100644 (file)
index 0000000..712ba2c
--- /dev/null
@@ -0,0 +1,26 @@
+#
+# Consult application.default.yml for the full list of configuration
+# settings.
+#
+# The order of precedence is:
+# 1. config/environments/{RAILS_ENV}.rb (deprecated)
+# 2. Section in application.yml corresponding to RAILS_ENV (e.g., development)
+# 3. Section in application.yml called "common"
+# 4. Section in application.default.yml corresponding to RAILS_ENV
+# 5. Section in application.default.yml called "common"
+
+production:
+  allow_account_registration: true
+
+  secret_token: @@SSO_SECRET@@
+  uuid_prefix: 'zzzzz'
+
+development:
+  # No development settings 
+
+test:
+  # No test settings 
+
+common:
+  # No common settings 
+
index a6ccebcd91dd4d5f46221a575d1a1d4e0cb4da6e..6ae04812325994678dad9e29d736b5556731f4f4 100755 (executable)
@@ -643,12 +643,44 @@ my $thisround_failed_multiple = 0;
 @jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level}
                       or $a <=> $b } @jobstep_todo;
 my $level = $jobstep[$jobstep_todo[0]]->{level};
-Log (undef, "start level $level");
 
+my $initial_tasks_this_level = 0;
+foreach my $id (@jobstep_todo) {
+  $initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level);
+}
 
+# If the number of tasks scheduled at this level #T is smaller than the number
+# of slots available #S, only use the first #T slots, or the first slot on
+# each node, whichever number is greater.
+#
+# When we dispatch tasks later, we'll allocate whole-node resources like RAM
+# based on these numbers.  Using fewer slots makes more resources available
+# to each individual task, which should normally be a better strategy when
+# there are fewer of them running with less parallelism.
+#
+# Note that this calculation is not redone if the initial tasks at
+# this level queue more tasks at the same level.  This may harm
+# overall task throughput for that level.
+my @freeslot;
+if ($initial_tasks_this_level < @node) {
+  @freeslot = (0..$#node);
+} elsif ($initial_tasks_this_level < @slot) {
+  @freeslot = (0..$initial_tasks_this_level - 1);
+} else {
+  @freeslot = (0..$#slot);
+}
+my $round_num_freeslots = scalar(@freeslot);
 
+my %round_max_slots = ();
+for (my $ii = $#freeslot; $ii >= 0; $ii--) {
+  my $this_slot = $slot[$freeslot[$ii]];
+  my $node_name = $this_slot->{node}->{name};
+  $round_max_slots{$node_name} ||= $this_slot->{cpu};
+  last if (scalar(keys(%round_max_slots)) >= @node);
+}
+
+Log(undef, "start level $level with $round_num_freeslots slots");
 my %proc;
-my @freeslot = (0..$#slot);
 my @holdslot;
 my %reader;
 my $progress_is_dirty = 1;
@@ -657,12 +689,7 @@ my $progress_stats_updated = 0;
 update_progress_stats();
 
 
-
 THISROUND:
-my $tasks_this_level = 0;
-foreach my $id (@jobstep_todo) {
-  $tasks_this_level++ if ($jobstep[$id]->{level} == $level);
-}
 for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 {
   my $id = $jobstep_todo[$todo_ptr];
@@ -715,16 +742,11 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     $ENV{"HOME"} = $ENV{"TASK_WORK"};
     $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
     $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
-    $ENV{"CRUNCH_NODE_SLOTS"} = $slot[$childslot]->{node}->{ncpus};
+    $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
     $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
 
     $ENV{"GZIP"} = "-n";
 
-    my $max_node_concurrent_tasks = $ENV{CRUNCH_NODE_SLOTS};
-    if ($tasks_this_level < $max_node_concurrent_tasks) {
-      $max_node_concurrent_tasks = $tasks_this_level;
-    }
-
     my @srunargs = (
       "srun",
       "--nodelist=".$childnode->{name},
@@ -739,7 +761,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         # $command.  No tool is expected to read these values directly.
         .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
         .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
-        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($max_node_concurrent_tasks * 100) )) "
+        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
     $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
     if ($docker_hash)
@@ -859,7 +881,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 
   while (!@freeslot
         ||
-        (@slot > @freeslot && $todo_ptr+1 > $#jobstep_todo))
+        ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
   {
     last THISROUND if $main::please_freeze || defined($main::success);
     if ($main::please_info)
index 4c16398397fa8881ffe8060f45b2e910007d506e..ae40a89a2b6bc9e033828e06fdf1364b0719d490 100644 (file)
@@ -20,13 +20,33 @@ import (
 var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
 var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
 
-type ArvadosApiError struct {
-       error
-       HttpStatusCode int
-       HttpStatus string
+// Indicates an error that was returned by the API server.
+type APIServerError struct {
+       // Address of server returning error, of the form "host:port".
+       ServerAddress string
+
+       // Components of server response.
+       HttpStatusCode    int
+       HttpStatusMessage string
+
+       // Additional error details from response body.
+       ErrorDetails []string
 }
 
-func (e ArvadosApiError) Error() string { return e.error.Error() }
+func (e APIServerError) Error() string {
+       if len(e.ErrorDetails) > 0 {
+               return fmt.Sprintf("arvados API server error: %s (%d: %s) returned by %s",
+                       strings.Join(e.ErrorDetails, "; "),
+                       e.HttpStatusCode,
+                       e.HttpStatusMessage,
+                       e.ServerAddress)
+       } else {
+               return fmt.Sprintf("arvados API server error: %d: %s returned by %s",
+                       e.HttpStatusCode,
+                       e.HttpStatusMessage,
+                       e.ServerAddress)
+       }
+}
 
 // Helper type so we don't have to write out 'map[string]interface{}' every time.
 type Dict map[string]interface{}
@@ -50,15 +70,15 @@ type ArvadosClient struct {
        External bool
 }
 
-// Create a new KeepClient, initialized with standard Arvados environment
+// Create a new ArvadosClient, initialized with standard Arvados environment
 // variables ARVADOS_API_HOST, ARVADOS_API_TOKEN, and (optionally)
 // ARVADOS_API_HOST_INSECURE.
-func MakeArvadosClient() (kc ArvadosClient, err error) {
+func MakeArvadosClient() (ac ArvadosClient, err error) {
        var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
        insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
        external := matchTrue.MatchString(os.Getenv("ARVADOS_EXTERNAL_CLIENT"))
 
-       kc = ArvadosClient{
+       ac = ArvadosClient{
                ApiServer:   os.Getenv("ARVADOS_API_HOST"),
                ApiToken:    os.Getenv("ARVADOS_API_TOKEN"),
                ApiInsecure: insecure,
@@ -66,14 +86,14 @@ func MakeArvadosClient() (kc ArvadosClient, err error) {
                        TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
                External: external}
 
-       if kc.ApiServer == "" {
-               return kc, MissingArvadosApiHost
+       if ac.ApiServer == "" {
+               return ac, MissingArvadosApiHost
        }
-       if kc.ApiToken == "" {
-               return kc, MissingArvadosApiToken
+       if ac.ApiToken == "" {
+               return ac, MissingArvadosApiToken
        }
 
-       return kc, err
+       return ac, err
 }
 
 // Low-level access to a resource.
@@ -149,30 +169,36 @@ func (this ArvadosClient) CallRaw(method string, resource string, uuid string, a
        }
 
        defer resp.Body.Close()
-       errorText := fmt.Sprintf("API response: %s", resp.Status)
+       return nil, newAPIServerError(this.ApiServer, resp)
+}
+
+func newAPIServerError(ServerAddress string, resp *http.Response) APIServerError {
+
+       ase := APIServerError{
+               ServerAddress:     ServerAddress,
+               HttpStatusCode:    resp.StatusCode,
+               HttpStatusMessage: resp.Status}
 
        // If the response body has {"errors":["reason1","reason2"]}
        // then return those reasons.
        var errInfo = Dict{}
        if err := json.NewDecoder(resp.Body).Decode(&errInfo); err == nil {
                if errorList, ok := errInfo["errors"]; ok {
-                       var errorStrings []string
                        if errArray, ok := errorList.([]interface{}); ok {
                                for _, errItem := range errArray {
                                        // We expect an array of strings here.
                                        // Non-strings will be passed along
                                        // JSON-encoded.
                                        if s, ok := errItem.(string); ok {
-                                               errorStrings = append(errorStrings, s)
+                                               ase.ErrorDetails = append(ase.ErrorDetails, s)
                                        } else if j, err := json.Marshal(errItem); err == nil {
-                                               errorStrings = append(errorStrings, string(j))
+                                               ase.ErrorDetails = append(ase.ErrorDetails, string(j))
                                        }
                                }
-                               errorText = strings.Join(errorStrings, "; ")
                        }
                }
        }
-       return nil, ArvadosApiError{errors.New(errorText), resp.StatusCode, resp.Status}
+       return ase
 }
 
 // Access to a resource.
index 1af964d0a045ad2b4bb0a6dd9610fcf11d8027d3..21eff20355d21ce3befabfdff4087a9b7357ee89 100644 (file)
@@ -1,8 +1,8 @@
 package arvadosclient
 
 import (
-       . "gopkg.in/check.v1"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       . "gopkg.in/check.v1"
        "net/http"
        "os"
        "testing"
@@ -86,17 +86,19 @@ func (s *ServerRequiredSuite) TestErrorResponse(c *C) {
                err := arv.Create("logs",
                        Dict{"log": Dict{"bogus_attr": "foo"}},
                        &getback)
+               c.Assert(err, ErrorMatches, "arvados API server error: .*")
                c.Assert(err, ErrorMatches, ".*unknown attribute: bogus_attr.*")
-               c.Assert(err, FitsTypeOf, ArvadosApiError{})
-               c.Assert(err.(ArvadosApiError).HttpStatusCode, Equals, 422)
+               c.Assert(err, FitsTypeOf, APIServerError{})
+               c.Assert(err.(APIServerError).HttpStatusCode, Equals, 422)
        }
 
        {
                err := arv.Create("bogus",
                        Dict{"bogus": Dict{}},
                        &getback)
-               c.Assert(err, ErrorMatches, "Path not found")
-               c.Assert(err, FitsTypeOf, ArvadosApiError{})
-               c.Assert(err.(ArvadosApiError).HttpStatusCode, Equals, 404)
+               c.Assert(err, ErrorMatches, "arvados API server error: .*")
+               c.Assert(err, ErrorMatches, ".*Path not found.*")
+               c.Assert(err, FitsTypeOf, APIServerError{})
+               c.Assert(err.(APIServerError).HttpStatusCode, Equals, 404)
        }
 }
index 5d791948dcb808f3373555d183d61f7df5a22100..31cfb572e48729902da95ca7d5c2616f5911d0dd 100644 (file)
@@ -14,11 +14,9 @@ import (
        "net/http"
        "os"
        "regexp"
+       "strconv"
        "strings"
        "sync"
-       "sync/atomic"
-       "time"
-       "unsafe"
 )
 
 // A Keep "block" is 64MB.
@@ -26,9 +24,10 @@ const BLOCKSIZE = 64 * 1024 * 1024
 
 var BlockNotFound = errors.New("Block not found")
 var InsufficientReplicasError = errors.New("Could not write sufficient replicas")
-var OversizeBlockError = errors.New("Block too big")
+var OversizeBlockError = errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")")
 var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
 var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
+var InvalidLocatorError = errors.New("Invalid locator")
 
 const X_Keep_Desired_Replicas = "X-Keep-Desired-Replicas"
 const X_Keep_Replicas_Stored = "X-Keep-Replicas-Stored"
@@ -38,42 +37,43 @@ type KeepClient struct {
        Arvados       *arvadosclient.ArvadosClient
        Want_replicas int
        Using_proxy   bool
-       service_roots *map[string]string
-       lock          sync.Mutex
+       localRoots    *map[string]string
+       gatewayRoots  *map[string]string
+       lock          sync.RWMutex
        Client        *http.Client
 }
 
 // Create a new KeepClient.  This will contact the API server to discover Keep
 // servers.
-func MakeKeepClient(arv *arvadosclient.ArvadosClient) (kc KeepClient, err error) {
+func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
        var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
        insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
-       kc KeepClient{
+       kc := &KeepClient{
                Arvados:       arv,
                Want_replicas: 2,
                Using_proxy:   false,
                Client: &http.Client{Transport: &http.Transport{
                        TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
        }
-       _, err = (&kc).DiscoverKeepServers()
-
-       return kc, err
+       return kc, kc.DiscoverKeepServers()
 }
 
-// Put a block given the block hash, a reader with the block data, and the
-// expected length of that data.  The desired number of replicas is given in
-// KeepClient.Want_replicas.  Returns the number of replicas that were written
-// and if there was an error.  Note this will return InsufficientReplias
-// whenever 0 <= replicas < this.Wants_replicas.
-func (this KeepClient) PutHR(hash string, r io.Reader, expectedLength int64) (locator string, replicas int, err error) {
-
+// Put a block given the block hash, a reader, and the number of bytes
+// to read from the reader (which must be between 0 and BLOCKSIZE).
+//
+// Returns the locator for the written block, the number of replicas
+// written, and an error.
+//
+// Returns an InsufficientReplicas error if 0 <= replicas <
+// kc.Wants_replicas.
+func (kc *KeepClient) PutHR(hash string, r io.Reader, dataBytes int64) (string, int, error) {
        // Buffer for reads from 'r'
        var bufsize int
-       if expectedLength > 0 {
-               if expectedLength > BLOCKSIZE {
+       if dataBytes > 0 {
+               if dataBytes > BLOCKSIZE {
                        return "", 0, OversizeBlockError
                }
-               bufsize = int(expectedLength)
+               bufsize = int(dataBytes)
        } else {
                bufsize = BLOCKSIZE
        }
@@ -81,215 +81,200 @@ func (this KeepClient) PutHR(hash string, r io.Reader, expectedLength int64) (lo
        t := streamer.AsyncStreamFromReader(bufsize, HashCheckingReader{r, md5.New(), hash})
        defer t.Close()
 
-       return this.putReplicas(hash, t, expectedLength)
+       return kc.putReplicas(hash, t, dataBytes)
 }
 
-// Put a block given the block hash and a byte buffer.  The desired number of
-// replicas is given in KeepClient.Want_replicas.  Returns the number of
-// replicas that were written and if there was an error.  Note this will return
-// InsufficientReplias whenever 0 <= replicas < this.Wants_replicas.
-func (this KeepClient) PutHB(hash string, buf []byte) (locator string, replicas int, err error) {
+// PutHB writes a block to Keep. The hash of the bytes is given in
+// hash, and the data is given in buf.
+//
+// Return values are the same as for PutHR.
+func (kc *KeepClient) PutHB(hash string, buf []byte) (string, int, error) {
        t := streamer.AsyncStreamFromSlice(buf)
        defer t.Close()
-
-       return this.putReplicas(hash, t, int64(len(buf)))
+       return kc.putReplicas(hash, t, int64(len(buf)))
 }
 
-// Put a block given a buffer.  The hash will be computed.  The desired number
-// of replicas is given in KeepClient.Want_replicas.  Returns the number of
-// replicas that were written and if there was an error.  Note this will return
-// InsufficientReplias whenever 0 <= replicas < this.Wants_replicas.
-func (this KeepClient) PutB(buffer []byte) (locator string, replicas int, err error) {
+// PutB writes a block to Keep. It computes the hash itself.
+//
+// Return values are the same as for PutHR.
+func (kc *KeepClient) PutB(buffer []byte) (string, int, error) {
        hash := fmt.Sprintf("%x", md5.Sum(buffer))
-       return this.PutHB(hash, buffer)
+       return kc.PutHB(hash, buffer)
 }
 
-// Put a block, given a Reader.  This will read the entire reader into a buffer
-// to compute the hash.  The desired number of replicas is given in
-// KeepClient.Want_replicas.  Returns the number of replicas that were written
-// and if there was an error.  Note this will return InsufficientReplias
-// whenever 0 <= replicas < this.Wants_replicas.  Also nhote that if the block
-// hash and data size are available, PutHR() is more efficient.
-func (this KeepClient) PutR(r io.Reader) (locator string, replicas int, err error) {
+// PutR writes a block to Keep. It first reads all data from r into a buffer
+// in order to compute the hash.
+//
+// Return values are the same as for PutHR.
+//
+// If the block hash and data size are known, PutHR is more efficient.
+func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error) {
        if buffer, err := ioutil.ReadAll(r); err != nil {
                return "", 0, err
        } else {
-               return this.PutB(buffer)
+               return kc.PutB(buffer)
        }
 }
 
-// Get a block given a hash.  Return a reader, the expected data length, the
-// URL the block was fetched from, and if there was an error.  If the block
-// checksum does not match, the final Read() on the reader returned by this
-// method will return a BadChecksum error instead of EOF.
-func (this KeepClient) Get(hash string) (reader io.ReadCloser,
-       contentLength int64, url string, err error) {
-       return this.AuthorizedGet(hash, "", "")
-}
-
-// Get a block given a hash, with additional authorization provided by
-// signature and timestamp.  Return a reader, the expected data length, the URL
-// the block was fetched from, and if there was an error.  If the block
-// checksum does not match, the final Read() on the reader returned by this
-// method will return a BadChecksum error instead of EOF.
-func (this KeepClient) AuthorizedGet(hash string,
-       signature string,
-       timestamp string) (reader io.ReadCloser,
-       contentLength int64, url string, err error) {
-
-       // Take the hash of locator and timestamp in order to identify this
-       // specific transaction in log statements.
-       requestId := fmt.Sprintf("%x", md5.Sum([]byte(hash+time.Now().String())))[0:8]
-
-       // Calculate the ordering for asking servers
-       sv := NewRootSorter(this.ServiceRoots(), hash).GetSortedRoots()
-
-       for _, host := range sv {
-               var req *http.Request
-               var err error
-               var url string
-               if signature != "" {
-                       url = fmt.Sprintf("%s/%s+A%s@%s", host, hash,
-                               signature, timestamp)
-               } else {
-                       url = fmt.Sprintf("%s/%s", host, hash)
-               }
-               if req, err = http.NewRequest("GET", url, nil); err != nil {
+// Get() retrieves a block, given a locator. Returns a reader, the
+// expected data length, the URL the block is being fetched from, and
+// an error.
+//
+// If the block checksum does not match, the final Read() on the
+// reader returned by this method will return a BadChecksum error
+// instead of EOF.
+func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
+       var errs []string
+       for _, host := range kc.getSortedRoots(locator) {
+               url := host + "/" + locator
+               req, err := http.NewRequest("GET", url, nil)
+               if err != nil {
                        continue
                }
-
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
-
-               log.Printf("[%v] Begin download %s", requestId, url)
-
-               var resp *http.Response
-               if resp, err = this.Client.Do(req); err != nil || resp.StatusCode != http.StatusOK {
-                       statusCode := -1
-                       var respbody []byte
+               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+               resp, err := kc.Client.Do(req)
+               if err != nil || resp.StatusCode != http.StatusOK {
                        if resp != nil {
-                               statusCode = resp.StatusCode
+                               var respbody []byte
                                if resp.Body != nil {
                                        respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
                                }
+                               errs = append(errs, fmt.Sprintf("%s: %d %s",
+                                       url, resp.StatusCode, strings.TrimSpace(string(respbody))))
+                       } else {
+                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
                        }
-                       response := strings.TrimSpace(string(respbody))
-                       log.Printf("[%v] Download %v status code: %v error: \"%v\" response: \"%v\"",
-                               requestId, url, statusCode, err, response)
                        continue
                }
-
-               if resp.StatusCode == http.StatusOK {
-                       log.Printf("[%v] Download %v status code: %v", requestId, url, resp.StatusCode)
-                       return HashCheckingReader{resp.Body, md5.New(), hash}, resp.ContentLength, url, nil
-               }
+               return HashCheckingReader{
+                       Reader: resp.Body,
+                       Hash:   md5.New(),
+                       Check:  locator[0:32],
+               }, resp.ContentLength, url, nil
        }
-
+       log.Printf("DEBUG: GET %s failed: %v", locator, errs)
        return nil, 0, "", BlockNotFound
 }
 
-// Determine if a block with the given hash is available and readable, but does
-// not return the block contents.
-func (this KeepClient) Ask(hash string) (contentLength int64, url string, err error) {
-       return this.AuthorizedAsk(hash, "", "")
-}
-
-// Determine if a block with the given hash is available and readable with the
-// given signature and timestamp, but does not return the block contents.
-func (this KeepClient) AuthorizedAsk(hash string, signature string,
-       timestamp string) (contentLength int64, url string, err error) {
-       // Calculate the ordering for asking servers
-       sv := NewRootSorter(this.ServiceRoots(), hash).GetSortedRoots()
-
-       for _, host := range sv {
-               var req *http.Request
-               var err error
-               if signature != "" {
-                       url = fmt.Sprintf("%s/%s+A%s@%s", host, hash,
-                               signature, timestamp)
-               } else {
-                       url = fmt.Sprintf("%s/%s", host, hash)
-               }
-
-               if req, err = http.NewRequest("HEAD", url, nil); err != nil {
-                       continue
-               }
-
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
-
-               var resp *http.Response
-               if resp, err = this.Client.Do(req); err != nil {
+// Ask() verifies that a block with the given hash is available and
+// readable, according to at least one Keep service. Unlike Get, it
+// does not retrieve the data or verify that the data content matches
+// the hash specified by the locator.
+//
+// Returns the data size (content length) reported by the Keep service
+// and the URI reporting the data size.
+func (kc *KeepClient) Ask(locator string) (int64, string, error) {
+       for _, host := range kc.getSortedRoots(locator) {
+               url := host + "/" + locator
+               req, err := http.NewRequest("HEAD", url, nil)
+               if err != nil {
                        continue
                }
-
-               if resp.StatusCode == http.StatusOK {
+               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+               if resp, err := kc.Client.Do(req); err == nil && resp.StatusCode == http.StatusOK {
                        return resp.ContentLength, url, nil
                }
        }
-
        return 0, "", BlockNotFound
-
 }
 
-// Atomically read the service_roots field.
-func (this *KeepClient) ServiceRoots() map[string]string {
-       r := (*map[string]string)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&this.service_roots))))
-       return *r
+// LocalRoots() returns the map of local (i.e., disk and proxy) Keep
+// services: uuid -> baseURI.
+func (kc *KeepClient) LocalRoots() map[string]string {
+       kc.lock.RLock()
+       defer kc.lock.RUnlock()
+       return *kc.localRoots
 }
 
-// Atomically update the service_roots field.  Enables you to update
-// service_roots without disrupting any GET or PUT operations that might
-// already be in progress.
-func (this *KeepClient) SetServiceRoots(new_roots map[string]string) {
-       roots := make(map[string]string)
-       for uuid, root := range new_roots {
-               roots[uuid] = root
-       }
-       atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&this.service_roots)),
-               unsafe.Pointer(&roots))
+// GatewayRoots() returns the map of Keep remote gateway services:
+// uuid -> baseURI.
+func (kc *KeepClient) GatewayRoots() map[string]string {
+       kc.lock.RLock()
+       defer kc.lock.RUnlock()
+       return *kc.gatewayRoots
 }
 
-type Locator struct {
-       Hash      string
-       Size      int
-       Signature string
-       Timestamp string
+// SetServiceRoots updates the localRoots and gatewayRoots maps,
+// without risk of disrupting operations that are already in progress.
+//
+// The KeepClient makes its own copy of the supplied maps, so the
+// caller can reuse/modify them after SetServiceRoots returns, but
+// they should not be modified by any other goroutine while
+// SetServiceRoots is running.
+func (kc *KeepClient) SetServiceRoots(newLocals, newGateways map[string]string) {
+       locals := make(map[string]string)
+       for uuid, root := range newLocals {
+               locals[uuid] = root
+       }
+       gateways := make(map[string]string)
+       for uuid, root := range newGateways {
+               gateways[uuid] = root
+       }
+       kc.lock.Lock()
+       defer kc.lock.Unlock()
+       kc.localRoots = &locals
+       kc.gatewayRoots = &gateways
 }
 
-func MakeLocator2(hash string, hints string) (locator Locator) {
-       locator.Hash = hash
-       if hints != "" {
-               signature_pat, _ := regexp.Compile("^A([[:xdigit:]]+)@([[:xdigit:]]{8})$")
-               for _, hint := range strings.Split(hints, "+") {
-                       if hint != "" {
-                               if match, _ := regexp.MatchString("^[[:digit:]]+$", hint); match {
-                                       fmt.Sscanf(hint, "%d", &locator.Size)
-                               } else if m := signature_pat.FindStringSubmatch(hint); m != nil {
-                                       locator.Signature = m[1]
-                                       locator.Timestamp = m[2]
-                               } else if match, _ := regexp.MatchString("^[:upper:]", hint); match {
-                                       // Any unknown hint that starts with an uppercase letter is
-                                       // presumed to be valid and ignored, to permit forward compatibility.
-                               } else {
-                                       // Unknown format; not a valid locator.
-                                       return Locator{"", 0, "", ""}
-                               }
+// getSortedRoots returns a list of base URIs of Keep services, in the
+// order they should be attempted in order to retrieve content for the
+// given locator.
+func (kc *KeepClient) getSortedRoots(locator string) []string {
+       var found []string
+       for _, hint := range strings.Split(locator, "+") {
+               if len(hint) < 7 || hint[0:2] != "K@" {
+                       // Not a service hint.
+                       continue
+               }
+               if len(hint) == 7 {
+                       // +K@abcde means fetch from proxy at
+                       // keep.abcde.arvadosapi.com
+                       found = append(found, "https://keep."+hint[2:]+".arvadosapi.com")
+               } else if len(hint) == 29 {
+                       // +K@abcde-abcde-abcdeabcdeabcde means fetch
+                       // from gateway with given uuid
+                       if gwURI, ok := kc.GatewayRoots()[hint[2:]]; ok {
+                               found = append(found, gwURI)
                        }
+                       // else this hint is no use to us; carry on.
                }
        }
-       return locator
+       // After trying all usable service hints, fall back to local roots.
+       found = append(found, NewRootSorter(kc.LocalRoots(), locator[0:32]).GetSortedRoots()...)
+       return found
+}
+
+type Locator struct {
+       Hash  string
+       Size  int      // -1 if data size is not known
+       Hints []string // Including the size hint, if any
 }
 
-func MakeLocator(path string) Locator {
-       pathpattern, err := regexp.Compile("^([0-9a-f]{32})([+].*)?$")
-       if err != nil {
-               log.Print("Don't like regexp", err)
+func (loc *Locator) String() string {
+       s := loc.Hash
+       if len(loc.Hints) > 0 {
+               s = s + "+" + strings.Join(loc.Hints, "+")
        }
+       return s
+}
+
+var locatorMatcher = regexp.MustCompile("^([0-9a-f]{32})([+](.*))?$")
 
-       sm := pathpattern.FindStringSubmatch(path)
+func MakeLocator(path string) (*Locator, error) {
+       sm := locatorMatcher.FindStringSubmatch(path)
        if sm == nil {
-               log.Print("Failed match ", path)
-               return Locator{"", 0, "", ""}
+               return nil, InvalidLocatorError
        }
-
-       return MakeLocator2(sm[1], sm[2])
+       loc := Locator{Hash: sm[1], Size: -1}
+       if sm[2] != "" {
+               loc.Hints = strings.Split(sm[3], "+")
+       } else {
+               loc.Hints = []string{}
+       }
+       if len(loc.Hints) > 0 {
+               if size, err := strconv.Atoi(loc.Hints[0]); err == nil {
+                       loc.Size = size
+               }
+       }
+       return &loc, nil
 }
index cbd27d72e7c7e9310de1ed027e47912b7a187baa..236d8dbfdab7e3940945d2bd8262db70d878b88e 100644 (file)
@@ -63,8 +63,8 @@ func (s *ServerRequiredSuite) TestMakeKeepClient(c *C) {
        kc, err := MakeKeepClient(&arv)
 
        c.Assert(err, Equals, nil)
-       c.Check(len(kc.ServiceRoots()), Equals, 2)
-       for _, root := range kc.ServiceRoots() {
+       c.Check(len(kc.LocalRoots()), Equals, 2)
+       for _, root := range kc.LocalRoots() {
                c.Check(root, Matches, "http://localhost:\\d+")
        }
 }
@@ -77,14 +77,14 @@ type StubPutHandler struct {
        handled        chan string
 }
 
-func (this StubPutHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       this.c.Check(req.URL.Path, Equals, "/"+this.expectPath)
-       this.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", this.expectApiToken))
+func (sph StubPutHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       sph.c.Check(req.URL.Path, Equals, "/"+sph.expectPath)
+       sph.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", sph.expectApiToken))
        body, err := ioutil.ReadAll(req.Body)
-       this.c.Check(err, Equals, nil)
-       this.c.Check(body, DeepEquals, []byte(this.expectBody))
+       sph.c.Check(err, Equals, nil)
+       sph.c.Check(body, DeepEquals, []byte(sph.expectBody))
        resp.WriteHeader(200)
-       this.handled <- fmt.Sprintf("http://%s", req.Host)
+       sph.handled <- fmt.Sprintf("http://%s", req.Host)
 }
 
 func RunFakeKeepServer(st http.Handler) (ks KeepServer) {
@@ -98,7 +98,7 @@ func RunFakeKeepServer(st http.Handler) (ks KeepServer) {
        return
 }
 
-func UploadToStubHelper(c *C, st http.Handler, f func(KeepClient, string,
+func UploadToStubHelper(c *C, st http.Handler, f func(*KeepClient, string,
        io.ReadCloser, io.WriteCloser, chan uploadStatus)) {
 
        ks := RunFakeKeepServer(st)
@@ -126,7 +126,7 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
                make(chan string)}
 
        UploadToStubHelper(c, st,
-               func(kc KeepClient, url string, reader io.ReadCloser,
+               func(kc *KeepClient, url string, reader io.ReadCloser,
                        writer io.WriteCloser, upload_status chan uploadStatus) {
 
                        go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
@@ -153,7 +153,7 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
                make(chan string)}
 
        UploadToStubHelper(c, st,
-               func(kc KeepClient, url string, reader io.ReadCloser,
+               func(kc *KeepClient, url string, reader io.ReadCloser,
                        writer io.WriteCloser, upload_status chan uploadStatus) {
 
                        tr := streamer.AsyncStreamFromReader(512, reader)
@@ -179,9 +179,9 @@ type FailHandler struct {
        handled chan string
 }
 
-func (this FailHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+func (fh FailHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        resp.WriteHeader(500)
-       this.handled <- fmt.Sprintf("http://%s", req.Host)
+       fh.handled <- fmt.Sprintf("http://%s", req.Host)
 }
 
 func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
@@ -193,7 +193,7 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
        hash := "acbd18db4cc2f85cedef654fccc4a4d8"
 
        UploadToStubHelper(c, st,
-               func(kc KeepClient, url string, reader io.ReadCloser,
+               func(kc *KeepClient, url string, reader io.ReadCloser,
                        writer io.WriteCloser, upload_status chan uploadStatus) {
 
                        go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
@@ -242,21 +242,21 @@ func (s *StandaloneSuite) TestPutB(c *C) {
 
        kc.Want_replicas = 2
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks := RunSomeFakeKeepServers(st, 5)
 
        for i, k := range ks {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
 
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        kc.PutB([]byte("foo"))
 
        shuff := NewRootSorter(
-               kc.ServiceRoots(), Md5String("foo")).GetSortedRoots()
+               kc.LocalRoots(), Md5String("foo")).GetSortedRoots()
 
        s1 := <-st.handled
        s2 := <-st.handled
@@ -285,16 +285,16 @@ func (s *StandaloneSuite) TestPutHR(c *C) {
 
        kc.Want_replicas = 2
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks := RunSomeFakeKeepServers(st, 5)
 
        for i, k := range ks {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
 
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        reader, writer := io.Pipe()
 
@@ -305,7 +305,7 @@ func (s *StandaloneSuite) TestPutHR(c *C) {
 
        kc.PutHR(hash, reader, 3)
 
-       shuff := NewRootSorter(kc.ServiceRoots(), hash).GetSortedRoots()
+       shuff := NewRootSorter(kc.LocalRoots(), hash).GetSortedRoots()
        log.Print(shuff)
 
        s1 := <-st.handled
@@ -339,24 +339,24 @@ func (s *StandaloneSuite) TestPutWithFail(c *C) {
 
        kc.Want_replicas = 2
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks1 := RunSomeFakeKeepServers(st, 4)
        ks2 := RunSomeFakeKeepServers(fh, 1)
 
        for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
        for i, k := range ks2 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
                defer k.listener.Close()
        }
 
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        shuff := NewRootSorter(
-               kc.ServiceRoots(), Md5String("foo")).GetSortedRoots()
+               kc.LocalRoots(), Md5String("foo")).GetSortedRoots()
 
        phash, replicas, err := kc.PutB([]byte("foo"))
 
@@ -395,21 +395,21 @@ func (s *StandaloneSuite) TestPutWithTooManyFail(c *C) {
 
        kc.Want_replicas = 2
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks1 := RunSomeFakeKeepServers(st, 1)
        ks2 := RunSomeFakeKeepServers(fh, 4)
 
        for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
        for i, k := range ks2 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
                defer k.listener.Close()
        }
 
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        _, replicas, err := kc.PutB([]byte("foo"))
 
@@ -424,14 +424,16 @@ type StubGetHandler struct {
        c              *C
        expectPath     string
        expectApiToken string
-       returnBody     []byte
+       httpStatus     int
+       body           []byte
 }
 
-func (this StubGetHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       this.c.Check(req.URL.Path, Equals, "/"+this.expectPath)
-       this.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", this.expectApiToken))
-       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(this.returnBody)))
-       resp.Write(this.returnBody)
+func (sgh StubGetHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       sgh.c.Check(req.URL.Path, Equals, "/"+sgh.expectPath)
+       sgh.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", sgh.expectApiToken))
+       resp.WriteHeader(sgh.httpStatus)
+       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(sgh.body)))
+       resp.Write(sgh.body)
 }
 
 func (s *StandaloneSuite) TestGet(c *C) {
@@ -443,6 +445,7 @@ func (s *StandaloneSuite) TestGet(c *C) {
                c,
                hash,
                "abc123",
+               http.StatusOK,
                []byte("foo")}
 
        ks := RunFakeKeepServer(st)
@@ -451,7 +454,7 @@ func (s *StandaloneSuite) TestGet(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url})
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil)
 
        r, n, url2, err := kc.Get(hash)
        defer r.Close()
@@ -477,7 +480,7 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url})
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil)
 
        r, n, url2, err := kc.Get(hash)
        c.Check(err, Equals, BlockNotFound)
@@ -486,6 +489,133 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
        c.Check(r, Equals, nil)
 }
 
+func (s *StandaloneSuite) TestGetWithServiceHint(c *C) {
+       uuid := "zzzzz-bi6l4-123451234512345"
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       // This one shouldn't be used:
+       ks0 := RunFakeKeepServer(StubGetHandler{
+               c,
+               "error if used",
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks0.listener.Close()
+       // This one should be used:
+       ks := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(
+               map[string]string{"x": ks0.url},
+               map[string]string{uuid: ks.url})
+
+       r, n, uri, err := kc.Get(hash + "+K@" + uuid)
+       defer r.Close()
+       c.Check(err, Equals, nil)
+       c.Check(n, Equals, int64(3))
+       c.Check(uri, Equals, fmt.Sprintf("%s/%s", ks.url, hash+"+K@"+uuid))
+
+       content, err := ioutil.ReadAll(r)
+       c.Check(err, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
+// Use a service hint to fetch from a local disk service, overriding
+// rendezvous probe order.
+func (s *StandaloneSuite) TestGetWithLocalServiceHint(c *C) {
+       uuid := "zzzzz-bi6l4-zzzzzzzzzzzzzzz"
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       // This one shouldn't be used, although it appears first in
+       // rendezvous probe order:
+       ks0 := RunFakeKeepServer(StubGetHandler{
+               c,
+               "error if used",
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks0.listener.Close()
+       // This one should be used:
+       ks := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(
+               map[string]string{
+                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
+                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
+                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
+                       uuid: ks.url},
+               map[string]string{
+                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
+                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
+                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
+                       uuid: ks.url},
+       )
+
+       r, n, uri, err := kc.Get(hash + "+K@" + uuid)
+       defer r.Close()
+       c.Check(err, Equals, nil)
+       c.Check(n, Equals, int64(3))
+       c.Check(uri, Equals, fmt.Sprintf("%s/%s", ks.url, hash+"+K@"+uuid))
+
+       content, err := ioutil.ReadAll(r)
+       c.Check(err, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
+func (s *StandaloneSuite) TestGetWithServiceHintFailoverToLocals(c *C) {
+       uuid := "zzzzz-bi6l4-123451234512345"
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       ksLocal := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ksLocal.listener.Close()
+       ksGateway := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusInternalServerError,
+               []byte("Error")})
+       defer ksGateway.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(
+               map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
+               map[string]string{uuid: ksGateway.url})
+
+       r, n, uri, err := kc.Get(hash + "+K@" + uuid)
+       c.Assert(err, Equals, nil)
+       defer r.Close()
+       c.Check(n, Equals, int64(3))
+       c.Check(uri, Equals, fmt.Sprintf("%s/%s", ksLocal.url, hash+"+K@"+uuid))
+
+       content, err := ioutil.ReadAll(r)
+       c.Check(err, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
 type BarHandler struct {
        handled chan string
 }
@@ -507,7 +637,7 @@ func (s *StandaloneSuite) TestChecksum(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url})
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil)
 
        r, n, _, err := kc.Get(barhash)
        _, err = ioutil.ReadAll(r)
@@ -535,26 +665,27 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
                c,
                hash,
                "abc123",
+               http.StatusOK,
                content}
 
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks1 := RunSomeFakeKeepServers(st, 1)
        ks2 := RunSomeFakeKeepServers(fh, 4)
 
        for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
        for i, k := range ks2 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
                defer k.listener.Close()
        }
 
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        // This test works only if one of the failing services is
        // attempted before the succeeding service. Otherwise,
@@ -562,7 +693,7 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
        // the choice of block content "waz" and the UUIDs of the fake
        // servers, so we just tried different strings until we found
        // an example that passes this Assert.)
-       c.Assert(NewRootSorter(service_roots, hash).GetSortedRoots()[0], Not(Equals), ks1[0].url)
+       c.Assert(NewRootSorter(localRoots, hash).GetSortedRoots()[0], Not(Equals), ks1[0].url)
 
        r, n, url2, err := kc.Get(hash)
 
@@ -634,16 +765,16 @@ func (s *StandaloneSuite) TestPutProxy(c *C) {
        kc.Want_replicas = 2
        kc.Using_proxy = true
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks1 := RunSomeFakeKeepServers(st, 1)
 
        for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
 
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        _, replicas, err := kc.PutB([]byte("foo"))
        <-st.handled
@@ -665,15 +796,15 @@ func (s *StandaloneSuite) TestPutProxyInsufficientReplicas(c *C) {
        kc.Want_replicas = 3
        kc.Using_proxy = true
        arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
 
        ks1 := RunSomeFakeKeepServers(st, 1)
 
        for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                defer k.listener.Close()
        }
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, nil)
 
        _, replicas, err := kc.PutB([]byte("foo"))
        <-st.handled
@@ -685,10 +816,40 @@ func (s *StandaloneSuite) TestPutProxyInsufficientReplicas(c *C) {
 }
 
 func (s *StandaloneSuite) TestMakeLocator(c *C) {
-       l := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce+3+Aabcde@12345678")
+       l, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce+3+Aabcde@12345678")
+       c.Check(err, Equals, nil)
+       c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(l.Size, Equals, 3)
+       c.Check(l.Hints, DeepEquals, []string{"3", "Aabcde@12345678"})
+}
+
+func (s *StandaloneSuite) TestMakeLocatorNoHints(c *C) {
+       l, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(err, Equals, nil)
+       c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(l.Size, Equals, -1)
+       c.Check(l.Hints, DeepEquals, []string{})
+}
 
+func (s *StandaloneSuite) TestMakeLocatorNoSizeHint(c *C) {
+       l, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce+Aabcde@12345678")
+       c.Check(err, Equals, nil)
+       c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(l.Size, Equals, -1)
+       c.Check(l.Hints, DeepEquals, []string{"Aabcde@12345678"})
+}
+
+func (s *StandaloneSuite) TestMakeLocatorPreservesUnrecognizedHints(c *C) {
+       str := "91f372a266fe2bf2823cb8ec7fda31ce+3+Unknown+Kzzzzz+Afoobar"
+       l, err := MakeLocator(str)
+       c.Check(err, Equals, nil)
        c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
        c.Check(l.Size, Equals, 3)
-       c.Check(l.Signature, Equals, "abcde")
-       c.Check(l.Timestamp, Equals, "12345678")
+       c.Check(l.Hints, DeepEquals, []string{"3", "Unknown", "Kzzzzz", "Afoobar"})
+       c.Check(l.String(), Equals, str)
+}
+
+func (s *StandaloneSuite) TestMakeLocatorInvalidInput(c *C) {
+       _, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31c")
+       c.Check(err, Equals, InvalidLocatorError)
 }
index c2780bc716171e7e1b96efd76ec2ed1fa2af26d5..a8afea096109531ad0e27bf4c31e5b26010c8538 100644 (file)
@@ -5,12 +5,12 @@ import (
 )
 
 type RootSorter struct {
-       root         []string
-       weight       []string
-       order        []int
+       root   []string
+       weight []string
+       order  []int
 }
 
-func NewRootSorter(serviceRoots map[string]string, hash string) (*RootSorter) {
+func NewRootSorter(serviceRoots map[string]string, hash string) *RootSorter {
        rs := new(RootSorter)
        rs.root = make([]string, len(serviceRoots))
        rs.weight = make([]string, len(serviceRoots))
@@ -26,7 +26,7 @@ func NewRootSorter(serviceRoots map[string]string, hash string) (*RootSorter) {
        return rs
 }
 
-func (rs RootSorter) getWeight(hash string, uuid string) (string) {
+func (rs RootSorter) getWeight(hash string, uuid string) string {
        if len(uuid) == 27 {
                return Md5String(hash + uuid[12:])
        } else {
@@ -35,7 +35,7 @@ func (rs RootSorter) getWeight(hash string, uuid string) (string) {
        }
 }
 
-func (rs RootSorter) GetSortedRoots() ([]string) {
+func (rs RootSorter) GetSortedRoots() []string {
        sorted := make([]string, len(rs.order))
        for i := range rs.order {
                sorted[i] = rs.root[rs.order[i]]
index 455715d427ef8ac06f478adef20818058331a45a..23e8cc93f292a9a2196b09f4ac19558bb97e0dd4 100644 (file)
@@ -8,19 +8,20 @@ import (
 )
 
 type RootSorterSuite struct{}
+
 var _ = Suite(&RootSorterSuite{})
 
-func FakeSvcRoot(i uint64) (string) {
+func FakeSvcRoot(i uint64) string {
        return fmt.Sprintf("https://%x.svc/", i)
 }
 
-func FakeSvcUuid(i uint64) (string) {
+func FakeSvcUuid(i uint64) string {
        return fmt.Sprintf("zzzzz-bi6l4-%015x", i)
 }
 
-func FakeServiceRoots(n uint64) (map[string]string) {
+func FakeServiceRoots(n uint64) map[string]string {
        sr := map[string]string{}
-       for i := uint64(0); i < n; i ++ {
+       for i := uint64(0); i < n; i++ {
                sr[FakeSvcUuid(i)] = FakeSvcRoot(i)
        }
        return sr
index 940a110081dbaa46920cb472ea09d9e0635fd219..6393503e965b92b7ff27e8320e46e28eed69857d 100644 (file)
@@ -76,7 +76,7 @@ func (this *KeepClient) setClientSettingsStore() {
        }
 }
 
-func (this *KeepClient) DiscoverKeepServers() (map[string]string, error) {
+func (this *KeepClient) DiscoverKeepServers() error {
        type svcList struct {
                Items []keepDisk `json:"items"`
        }
@@ -86,31 +86,40 @@ func (this *KeepClient) DiscoverKeepServers() (map[string]string, error) {
 
        if err != nil {
                if err := this.Arvados.List("keep_disks", nil, &m); err != nil {
-                       return nil, err
+                       return err
                }
        }
 
        listed := make(map[string]bool)
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       gatewayRoots := make(map[string]string)
 
-       for _, element := range m.Items {
-               n := ""
-
-               if element.SSL {
-                       n = "s"
+       for _, service := range m.Items {
+               scheme := "http"
+               if service.SSL {
+                       scheme = "https"
                }
-
-               // Construct server URL
-               url := fmt.Sprintf("http%s://%s:%d", n, element.Hostname, element.Port)
+               url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
 
                // Skip duplicates
-               if !listed[url] {
-                       listed[url] = true
-                       service_roots[element.Uuid] = url
+               if listed[url] {
+                       continue
                }
-               if element.SvcType == "proxy" {
+               listed[url] = true
+
+               switch service.SvcType {
+               case "disk":
+                       localRoots[service.Uuid] = url
+               case "proxy":
+                       localRoots[service.Uuid] = url
                        this.Using_proxy = true
                }
+               // Gateway services are only used when specified by
+               // UUID, so there's nothing to gain by filtering them
+               // by service type. Including all accessible services
+               // (gateway and otherwise) merely accommodates more
+               // service configurations.
+               gatewayRoots[service.Uuid] = url
        }
 
        if this.Using_proxy {
@@ -119,9 +128,8 @@ func (this *KeepClient) DiscoverKeepServers() (map[string]string, error) {
                this.setClientSettingsStore()
        }
 
-       this.SetServiceRoots(service_roots)
-
-       return service_roots, nil
+       this.SetServiceRoots(localRoots, gatewayRoots)
+       return nil
 }
 
 type uploadStatus struct {
@@ -204,7 +212,7 @@ func (this KeepClient) putReplicas(
        requestId := fmt.Sprintf("%x", md5.Sum([]byte(locator+time.Now().String())))[0:8]
 
        // Calculate the ordering for uploading to servers
-       sv := NewRootSorter(this.ServiceRoots(), hash).GetSortedRoots()
+       sv := NewRootSorter(this.LocalRoots(), hash).GetSortedRoots()
 
        // The next server to try contacting
        next_server := 0
index ce342b5a41a887ec90cf6a4a979af06cd669e5e7..2e0eaa8ce0f645ca305826851c5eb7ddccb434e2 100644 (file)
@@ -305,7 +305,7 @@ class _BufferBlock(object):
                 self.buffer_view = None
                 self.buffer_block = None
         else:
-            raise AssertionError("Invalid state change from %s to %s" % (self.state, state))
+            raise AssertionError("Invalid state change from %s to %s" % (self.state, nextstate))
 
     @synchronized
     def state(self):
@@ -484,9 +484,10 @@ class _BlockManager(object):
                     thread.daemon = True
                     thread.start()
 
-        # Mark the block as PENDING so to disallow any more appends.
-        block.set_state(_BufferBlock.PENDING)
-        self._put_queue.put(block)
+        if block.state() == _BufferBlock.WRITABLE:
+            # Mark the block as PENDING so to disallow any more appends.
+            block.set_state(_BufferBlock.PENDING)
+            self._put_queue.put(block)
 
     @synchronized
     def get_bufferblock(self, locator):
@@ -523,8 +524,7 @@ class _BlockManager(object):
             items = self._bufferblocks.items()
 
         for k,v in items:
-            if v.state() == _BufferBlock.WRITABLE:
-                self.commit_bufferblock(v)
+            v.owner.flush()
 
         with self.lock:
             if self._put_queue is not None:
@@ -715,8 +715,15 @@ class ArvadosFile(object):
         elif size > self.size():
             raise IOError("truncate() does not support extending the file size")
 
-    def readfrom(self, offset, size, num_retries):
-        """Read upto `size` bytes from the file starting at `offset`."""
+
+    def readfrom(self, offset, size, num_retries, exact=False):
+        """Read up to `size` bytes from the file starting at `offset`.
+
+        :exact:
+         If False (default), return less data than requested if the read
+         crosses a block boundary and the next block isn't cached.  If True,
+         only return less data than requested when hitting EOF.
+        """
 
         with self.lock:
             if size == 0 or offset >= self.size():
@@ -729,14 +736,14 @@ class ArvadosFile(object):
 
         data = []
         for lr in readsegs:
-            block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=bool(data))
+            block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
             if block:
                 data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size])
             else:
                 break
         return ''.join(data)
 
-    def _repack_writes(self):
+    def _repack_writes(self, num_retries):
         """Test if the buffer block has more data than actual segments.
 
         This happens when a buffered write over-writes a file range written in
@@ -754,9 +761,10 @@ class ArvadosFile(object):
         if write_total < self._current_bblock.size():
             # There is more data in the buffer block than is actually accounted for by segments, so
             # re-pack into a new buffer by copying over to a new buffer block.
+            contents = self.parent._my_block_manager().get_block_contents(self._current_bblock.blockid, num_retries)
             new_bb = self.parent._my_block_manager().alloc_bufferblock(self._current_bblock.blockid, starting_capacity=write_total, owner=self)
             for t in bufferblock_segs:
-                new_bb.append(self._current_bblock.buffer_view[t.segment_offset:t.segment_offset+t.range_size].tobytes())
+                new_bb.append(contents[t.segment_offset:t.segment_offset+t.range_size])
                 t.segment_offset = new_bb.size() - t.range_size
 
             self._current_bblock = new_bb
@@ -785,7 +793,7 @@ class ArvadosFile(object):
             self._current_bblock = self.parent._my_block_manager().alloc_bufferblock(owner=self)
 
         if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
-            self._repack_writes()
+            self._repack_writes(num_retries)
             if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
                 self.parent._my_block_manager().commit_bufferblock(self._current_bblock)
                 self._current_bblock = self.parent._my_block_manager().alloc_bufferblock(owner=self)
@@ -795,9 +803,9 @@ class ArvadosFile(object):
         replace_range(self._segments, offset, len(data), self._current_bblock.blockid, self._current_bblock.write_pointer - len(data))
 
     @synchronized
-    def flush(self):
+    def flush(self, num_retries=0):
         if self._current_bblock:
-            self._repack_writes()
+            self._repack_writes(num_retries)
             self.parent._my_block_manager().commit_bufferblock(self._current_bblock)
 
     @must_be_writable
@@ -867,7 +875,7 @@ class ArvadosFileReader(ArvadosFileReaderBase):
     @retry_method
     def read(self, size, num_retries=None):
         """Read up to `size` bytes from the stream, starting at the current file position."""
-        data = self.arvadosfile.readfrom(self._filepos, size, num_retries)
+        data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
         self._filepos += len(data)
         return data
 
index f03deedb18aece57b374a2e94c0e29d26b42319c..30828732d8d4908ca0922bc780c11d2a6943578e 100644 (file)
@@ -822,12 +822,48 @@ class RichCollectionBase(CollectionBase):
 
         target_dir.add(source_obj, target_name, overwrite)
 
-    @synchronized
+    def portable_manifest_text(self, stream_name="."):
+        """Get the manifest text for this collection, sub collections and files.
+
+        This method does not flush outstanding blocks to Keep.  It will return
+        a normalized manifest with access tokens stripped.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        """
+        return self._get_manifest_text(stream_name, True, True)
+
     def manifest_text(self, stream_name=".", strip=False, normalize=False):
         """Get the manifest text for this collection, sub collections and files.
 
+        This method will flush outstanding blocks to Keep.  By default, it will
+        not normalize an unmodified manifest or strip access tokens.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        :strip:
+          If True, remove signing tokens from block locators if present.
+          If False (default), block locators are left unchanged.
+
+        :normalize:
+          If True, always export the manifest text in normalized form
+          even if the Collection is not modified.  If False (default) and the collection
+          is not modified, return the original manifest text even if it is not
+          in normalized form.
+
+        """
+
+        self._my_block_manager().commit_all()
+        return self._get_manifest_text(stream_name, strip, normalize)
+
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize):
+        """Get the manifest text for this collection, sub collections and files.
+
         :stream_name:
-          Name of the stream (directory)
+          Name to use for this stream (directory)
 
         :strip:
           If True, remove signing tokens from block locators if present.
@@ -861,7 +897,7 @@ class RichCollectionBase(CollectionBase):
             if stream:
                 buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
             for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
-                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip))
+                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True))
             return "".join(buf)
         else:
             if strip:
@@ -941,7 +977,7 @@ class RichCollectionBase(CollectionBase):
 
     def portable_data_hash(self):
         """Get the portable data hash for this collection's manifest."""
-        stripped = self.manifest_text(strip=True)
+        stripped = self.portable_manifest_text()
         return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
 
     @synchronized
@@ -1249,8 +1285,8 @@ class Collection(RichCollectionBase):
         """Save collection to an existing collection record.
 
         Commit pending buffer blocks to Keep, merge with remote record (if
-        merge=True, the default), write the manifest to Keep, and update the
-        collection record.
+        merge=True, the default), and update the collection record.  Returns
+        the current manifest text.
 
         Will raise AssertionError if not associated with a collection record on
         the API server.  If you want to save a manifest to Keep only, see
@@ -1267,10 +1303,11 @@ class Collection(RichCollectionBase):
         if self.modified():
             if not self._has_collection_uuid():
                 raise AssertionError("Collection manifest_locator must be a collection uuid.  Use save_new() for new collections.")
+
             self._my_block_manager().commit_all()
+
             if merge:
                 self.update()
-            self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
 
             text = self.manifest_text(strip=False)
             self._api_response = self._my_api().collections().update(
@@ -1281,23 +1318,30 @@ class Collection(RichCollectionBase):
             self._manifest_text = self._api_response["manifest_text"]
             self.set_unmodified()
 
+        return self._manifest_text
+
 
     @must_be_writable
     @synchronized
     @retry_method
-    def save_new(self, name=None, create_collection_record=True, owner_uuid=None, ensure_unique_name=False, num_retries=None):
+    def save_new(self, name=None,
+                 create_collection_record=True,
+                 owner_uuid=None,
+                 ensure_unique_name=False,
+                 num_retries=None):
         """Save collection to a new collection record.
 
-        Commit pending buffer blocks to Keep, write the manifest to Keep, and
-        create a new collection record (if create_collection_record True).
-        After creating a new collection record, this Collection object will be
-        associated with the new record used by `save()`.
+        Commit pending buffer blocks to Keep and, when create_collection_record
+        is True (default), create a new collection record.  After creating a
+        new collection record, this Collection object will be associated with
+        the new record used by `save()`.  Returns the current manifest text.
 
         :name:
           The collection name.
 
         :create_collection_record:
-          If True, create a collection record.  If False, only save the manifest to keep.
+           If True, create a collection record on the API server.
+           If False, only commit blocks to Keep and return the manifest text.
 
         :owner_uuid:
           the user, or project uuid that will own this collection.
@@ -1313,7 +1357,6 @@ class Collection(RichCollectionBase):
 
         """
         self._my_block_manager().commit_all()
-        self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
         text = self.manifest_text(strip=False)
 
         if create_collection_record:
@@ -1330,8 +1373,10 @@ class Collection(RichCollectionBase):
 
             self._manifest_locator = self._api_response["uuid"]
 
-        self._manifest_text = text
-        self.set_unmodified()
+            self._manifest_text = text
+            self.set_unmodified()
+
+        return text
 
     @synchronized
     def subscribe(self, callback):
index 54d9798e401049c8273f90b597a8bb80d94ed1b0..c64b645a381cd3c8f5e47a175bee6b1a7f0b0024 100755 (executable)
@@ -410,12 +410,10 @@ def create_collection_from(c, src, dst, args):
     for link_class in ("docker_image_repo+tag", "docker_image_hash"):
         docker_links = src.links().list(filters=[["head_uuid", "=", collection_uuid], ["link_class", "=", link_class]]).execute(num_retries=args.retries)['items']
 
-        for d in docker_links:
-            body={
-                'head_uuid': dst_collection['uuid'],
-                'link_class': link_class,
-                'name': d['name'],
-            }
+        for src_link in docker_links:
+            body = {key: src_link[key]
+                    for key in ['link_class', 'name', 'properties']}
+            body['head_uuid'] = dst_collection['uuid']
             body['owner_uuid'] = args.project_uuid
 
             lk = dst.links().create(body=body).execute(num_retries=args.retries)
index 3629520a4d5f7216a86a5b085585e4f0756b3581..bfd471ba52bee712a1e1768c91327ce28a9c6603 100644 (file)
@@ -1,7 +1,6 @@
 # errors.py - Arvados-specific exceptions.
 
 import json
-import requests
 
 from apiclient import errors as apiclient_errors
 from collections import OrderedDict
@@ -46,7 +45,7 @@ class KeepRequestError(Exception):
         self.message = message
 
     def _format_error(self, key, error):
-        if isinstance(error, requests.Response):
+        if isinstance(error, HttpError):
             err_fmt = "{} {} responded with {e.status_code} {e.reason}"
         else:
             err_fmt = "{} {} raised {e.__class__.__name__} ({e})"
@@ -61,6 +60,12 @@ class KeepRequestError(Exception):
         return self._request_errors
 
 
+class HttpError(Exception):
+    def __init__(self, status_code, reason):
+        self.status_code = status_code
+        self.reason = reason
+
+
 class ArgumentError(Exception):
     pass
 class SyntaxError(Exception):
index 6196b502021a6036aa96c33d61c9bc6fb5d4f4f1..48d115c7a43bb050b81860a51dfa8ed672d24407 100644 (file)
@@ -1,25 +1,28 @@
+import bz2
+import datetime
+import fcntl
+import functools
 import gflags
+import hashlib
+import json
 import logging
 import os
 import pprint
-import sys
-import types
-import subprocess
-import json
-import UserDict
+import pycurl
+import Queue
 import re
-import hashlib
+import socket
+import ssl
 import string
-import bz2
-import zlib
-import fcntl
-import time
+import cStringIO
+import subprocess
+import sys
 import threading
+import time
 import timer
-import datetime
-import ssl
-import socket
-import requests
+import types
+import UserDict
+import zlib
 
 import arvados
 import arvados.config as config
@@ -27,25 +30,10 @@ import arvados.errors
 import arvados.retry as retry
 import arvados.util
 
-try:
-    # Workaround for urllib3 bug.
-    # The 'requests' library enables urllib3's SNI support by default, which uses pyopenssl.
-    # However, urllib3 prior to version 1.10 has a major bug in this feature
-    # (OpenSSL WantWriteError, https://github.com/shazow/urllib3/issues/412)
-    # Unfortunately Debian 8 is stabilizing on urllib3 1.9.1 which means the
-    # following workaround is necessary to be able to use
-    # the arvados python sdk with the distribution-provided packages.
-    import urllib3
-    from pkg_resources import parse_version
-    if parse_version(urllib3.__version__) < parse_version('1.10'):
-        from urllib3.contrib import pyopenssl
-        pyopenssl.extract_from_urllib3()
-except ImportError:
-    pass
-
 _logger = logging.getLogger('arvados.keep')
 global_client_object = None
 
+
 class KeepLocator(object):
     EPOCH_DATETIME = datetime.datetime.utcfromtimestamp(0)
     HINT_RE = re.compile(r'^[A-Z][A-Za-z0-9@_-]+$')
@@ -62,7 +50,7 @@ class KeepLocator(object):
             self.size = None
         for hint in pieces:
             if self.HINT_RE.match(hint) is None:
-                raise ValueError("unrecognized hint data {}".format(hint))
+                raise ValueError("invalid hint format: {}".format(hint))
             elif hint.startswith('A'):
                 self.parse_permission_hint(hint)
             else:
@@ -301,75 +289,216 @@ class KeepClient(object):
 
 
     class KeepService(object):
-        # Make requests to a single Keep service, and track results.
-        HTTP_ERRORS = (requests.exceptions.RequestException,
-                       socket.error, ssl.SSLError)
+        """Make requests to a single Keep service, and track results.
+
+        A KeepService is intended to last long enough to perform one
+        transaction (GET or PUT) against one Keep service. This can
+        involve calling either get() or put() multiple times in order
+        to retry after transient failures. However, calling both get()
+        and put() on a single instance -- or using the same instance
+        to access two different Keep services -- will not produce
+        sensible behavior.
+        """
+
+        HTTP_ERRORS = (
+            socket.error,
+            ssl.SSLError,
+            arvados.errors.HttpError,
+        )
 
-        def __init__(self, root, session, **headers):
+        def __init__(self, root, user_agent_pool=Queue.LifoQueue(), **headers):
             self.root = root
-            self.last_result = None
-            self.success_flag = None
-            self.session = session
+            self._user_agent_pool = user_agent_pool
+            self._result = {'error': None}
+            self._usable = True
+            self._session = None
             self.get_headers = {'Accept': 'application/octet-stream'}
             self.get_headers.update(headers)
             self.put_headers = headers
 
         def usable(self):
-            return self.success_flag is not False
+            """Is it worth attempting a request?"""
+            return self._usable
 
         def finished(self):
-            return self.success_flag is not None
+            """Did the request succeed or encounter permanent failure?"""
+            return self._result['error'] == False or not self._usable
+
+        def last_result(self):
+            return self._result
 
-        def last_status(self):
+        def _get_user_agent(self):
             try:
-                return self.last_result.status_code
-            except AttributeError:
-                return None
+                return self._user_agent_pool.get(False)
+            except Queue.Empty:
+                return pycurl.Curl()
+
+        def _put_user_agent(self, ua):
+            try:
+                ua.reset()
+                self._user_agent_pool.put(ua, False)
+            except:
+                ua.close()
+
+        @staticmethod
+        def _socket_open(family, socktype, protocol, address=None):
+            """Because pycurl doesn't have CURLOPT_TCP_KEEPALIVE"""
+            s = socket.socket(family, socktype, protocol)
+            s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+            s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 75)
+            s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75)
+            return s
 
         def get(self, locator, timeout=None):
             # locator is a KeepLocator object.
             url = self.root + str(locator)
             _logger.debug("Request: GET %s", url)
+            curl = self._get_user_agent()
             try:
                 with timer.Timer() as t:
-                    result = self.session.get(url.encode('utf-8'),
-                                          headers=self.get_headers,
-                                          timeout=timeout)
+                    self._headers = {}
+                    response_body = cStringIO.StringIO()
+                    curl.setopt(pycurl.NOSIGNAL, 1)
+                    curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+                    curl.setopt(pycurl.URL, url.encode('utf-8'))
+                    curl.setopt(pycurl.HTTPHEADER, [
+                        '{}: {}'.format(k,v) for k,v in self.get_headers.iteritems()])
+                    curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
+                    curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                    self._setcurltimeouts(curl, timeout)
+                    try:
+                        curl.perform()
+                    except Exception as e:
+                        raise arvados.errors.HttpError(0, str(e))
+                    self._result = {
+                        'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
+                        'body': response_body.getvalue(),
+                        'headers': self._headers,
+                        'error': False,
+                    }
+                ok = retry.check_http_response_success(self._result['status_code'])
+                if not ok:
+                    self._result['error'] = arvados.errors.HttpError(
+                        self._result['status_code'],
+                        self._headers.get('x-status-line', 'Error'))
             except self.HTTP_ERRORS as e:
-                _logger.debug("Request fail: GET %s => %s: %s",
-                              url, type(e), str(e))
-                self.last_result = e
+                self._result = {
+                    'error': e,
+                }
+                ok = False
+            self._usable = ok != False
+            if self._result.get('status_code', None):
+                # The client worked well enough to get an HTTP status
+                # code, so presumably any problems are just on the
+                # server side and it's OK to reuse the client.
+                self._put_user_agent(curl)
             else:
-                self.last_result = result
-                self.success_flag = retry.check_http_response_success(result)
-                content = result.content
-                _logger.info("%s response: %s bytes in %s msec (%.3f MiB/sec)",
-                             self.last_status(), len(content), t.msecs,
-                             (len(content)/(1024.0*1024))/t.secs if t.secs > 0 else 0)
-                if self.success_flag:
-                    resp_md5 = hashlib.md5(content).hexdigest()
-                    if resp_md5 == locator.md5sum:
-                        return content
-                    _logger.warning("Checksum fail: md5(%s) = %s",
-                                    url, resp_md5)
-            return None
+                # Don't return this client to the pool, in case it's
+                # broken.
+                curl.close()
+            if not ok:
+                _logger.debug("Request fail: GET %s => %s: %s",
+                              url, type(self._result['error']), str(self._result['error']))
+                return None
+            _logger.info("%s response: %s bytes in %s msec (%.3f MiB/sec)",
+                         self._result['status_code'],
+                         len(self._result['body']),
+                         t.msecs,
+                         (len(self._result['body'])/(1024.0*1024))/t.secs if t.secs > 0 else 0)
+            resp_md5 = hashlib.md5(self._result['body']).hexdigest()
+            if resp_md5 != locator.md5sum:
+                _logger.warning("Checksum fail: md5(%s) = %s",
+                                url, resp_md5)
+                self._result['error'] = arvados.errors.HttpError(
+                    0, 'Checksum fail')
+                return None
+            return self._result['body']
 
         def put(self, hash_s, body, timeout=None):
             url = self.root + hash_s
             _logger.debug("Request: PUT %s", url)
+            curl = self._get_user_agent()
             try:
-                result = self.session.put(url.encode('utf-8'),
-                                      data=body,
-                                      headers=self.put_headers,
-                                      timeout=timeout)
+                self._headers = {}
+                body_reader = cStringIO.StringIO(body)
+                response_body = cStringIO.StringIO()
+                curl.setopt(pycurl.NOSIGNAL, 1)
+                curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+                curl.setopt(pycurl.URL, url.encode('utf-8'))
+                # Using UPLOAD tells cURL to wait for a "go ahead" from the
+                # Keep server (in the form of a HTTP/1.1 "100 Continue"
+                # response) instead of sending the request body immediately.
+                # This allows the server to reject the request if the request
+                # is invalid or the server is read-only, without waiting for
+                # the client to send the entire block.
+                curl.setopt(pycurl.UPLOAD, True)
+                curl.setopt(pycurl.INFILESIZE, len(body))
+                curl.setopt(pycurl.READFUNCTION, body_reader.read)
+                curl.setopt(pycurl.HTTPHEADER, [
+                    '{}: {}'.format(k,v) for k,v in self.put_headers.iteritems()])
+                curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
+                curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                self._setcurltimeouts(curl, timeout)
+                try:
+                    curl.perform()
+                except Exception as e:
+                    raise arvados.errors.HttpError(0, str(e))
+                self._result = {
+                    'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
+                    'body': response_body.getvalue(),
+                    'headers': self._headers,
+                    'error': False,
+                }
+                ok = retry.check_http_response_success(self._result['status_code'])
+                if not ok:
+                    self._result['error'] = arvados.errors.HttpError(
+                        self._result['status_code'],
+                        self._headers.get('x-status-line', 'Error'))
             except self.HTTP_ERRORS as e:
+                self._result = {
+                    'error': e,
+                }
+                ok = False
+            self._usable = ok != False # still usable if ok is True or None
+            if self._result.get('status_code', None):
+                # Client is functional. See comment in get().
+                self._put_user_agent(curl)
+            else:
+                curl.close()
+            if not ok:
                 _logger.debug("Request fail: PUT %s => %s: %s",
-                              url, type(e), str(e))
-                self.last_result = e
+                              url, type(self._result['error']), str(self._result['error']))
+                return False
+            return True
+
+        def _setcurltimeouts(self, curl, timeouts):
+            if not timeouts:
+                return
+            elif isinstance(timeouts, tuple):
+                conn_t, xfer_t = timeouts
+            else:
+                conn_t, xfer_t = (timeouts, timeouts)
+            curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(conn_t*1000))
+            curl.setopt(pycurl.TIMEOUT_MS, int(xfer_t*1000))
+
+        def _headerfunction(self, header_line):
+            header_line = header_line.decode('iso-8859-1')
+            if ':' in header_line:
+                name, value = header_line.split(':', 1)
+                name = name.strip().lower()
+                value = value.strip()
+            elif self._headers:
+                name = self._lastheadername
+                value = self._headers[name] + ' ' + header_line.strip()
+            elif header_line.startswith('HTTP/'):
+                name = 'x-status-line'
+                value = header_line
             else:
-                self.last_result = result
-                self.success_flag = retry.check_http_response_success(result)
-            return self.success_flag
+                _logger.error("Unexpected header line: %s", header_line)
+                return
+            self._lastheadername = name
+            self._headers[name] = value
+            # Returning None implies all bytes were written
 
 
     class KeepWriterThread(threading.Thread):
@@ -407,9 +536,8 @@ class KeepClient(object):
                 self.args['data_hash'],
                 self.args['data'],
                 timeout=self.args.get('timeout', None)))
-            status = self.service.last_status()
+            result = self.service.last_result()
             if self._success:
-                result = self.service.last_result
                 _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
                               str(threading.current_thread()),
                               self.args['data_hash'],
@@ -420,14 +548,15 @@ class KeepClient(object):
                 # we're talking to a proxy or other backend that
                 # stores to multiple copies for us.
                 try:
-                    replicas_stored = int(result.headers['x-keep-replicas-stored'])
+                    replicas_stored = int(result['headers']['x-keep-replicas-stored'])
                 except (KeyError, ValueError):
                     replicas_stored = 1
-                limiter.save_response(result.content.strip(), replicas_stored)
-            elif status is not None:
+                limiter.save_response(result['body'].strip(), replicas_stored)
+            elif result.get('status_code', None):
                 _logger.debug("Request fail: PUT %s => %s %s",
-                              self.args['data_hash'], status,
-                              self.service.last_result.content)
+                              self.args['data_hash'],
+                              result['status_code'],
+                              result['body'])
 
 
     def __init__(self, api_client=None, proxy=None,
@@ -484,10 +613,6 @@ class KeepClient(object):
           The default number of times to retry failed requests.
           This will be used as the default num_retries value when get() and
           put() are called.  Default 0.
-
-        :session:
-          The requests.Session object to use for get() and put() requests.
-          Will create one if not specified.
         """
         self.lock = threading.Lock()
         if proxy is None:
@@ -506,6 +631,7 @@ class KeepClient(object):
         self.block_cache = block_cache if block_cache else KeepBlockCache()
         self.timeout = timeout
         self.proxy_timeout = proxy_timeout
+        self._user_agent_pool = Queue.LifoQueue()
 
         if local_store:
             self.local_store = local_store
@@ -513,11 +639,11 @@ class KeepClient(object):
             self.put = self.local_store_put
         else:
             self.num_retries = num_retries
-            self.session = session if session is not None else requests.Session()
             if proxy:
                 if not proxy.endswith('/'):
                     proxy += '/'
                 self.api_token = api_token
+                self._gateway_services = {}
                 self._keep_services = [{
                     'uuid': 'proxy',
                     '_service_root': proxy,
@@ -531,6 +657,7 @@ class KeepClient(object):
                     api_client = arvados.api('v1')
                 self.api_client = api_client
                 self.api_token = api_client.api_token
+                self._gateway_services = {}
                 self._keep_services = None
                 self.using_proxy = None
                 self._static_services_list = False
@@ -560,21 +687,35 @@ class KeepClient(object):
             except Exception:  # API server predates Keep services.
                 keep_services = self.api_client.keep_disks().list()
 
-            self._keep_services = keep_services.execute().get('items')
-            if not self._keep_services:
+            accessible = keep_services.execute().get('items')
+            if not accessible:
                 raise arvados.errors.NoKeepServersError()
 
-            self.using_proxy = any(ks.get('service_type') == 'proxy'
-                                   for ks in self._keep_services)
-
             # Precompute the base URI for each service.
-            for r in self._keep_services:
-                r['_service_root'] = "{}://[{}]:{:d}/".format(
+            for r in accessible:
+                host = r['service_host']
+                if not host.startswith('[') and host.find(':') >= 0:
+                    # IPv6 URIs must be formatted like http://[::1]:80/...
+                    host = '[' + host + ']'
+                r['_service_root'] = "{}://{}:{:d}/".format(
                     'https' if r['service_ssl_flag'] else 'http',
-                    r['service_host'],
+                    host,
                     r['service_port'])
+
+            # Gateway services are only used when specified by UUID,
+            # so there's nothing to gain by filtering them by
+            # service_type.
+            self._gateway_services = {ks.get('uuid'): ks for ks in accessible}
+            _logger.debug(str(self._gateway_services))
+
+            self._keep_services = [
+                ks for ks in accessible
+                if ks.get('service_type') in ['disk', 'proxy']]
             _logger.debug(str(self._keep_services))
 
+            self.using_proxy = any(ks.get('service_type') == 'proxy'
+                                   for ks in self._keep_services)
+
     def _service_weight(self, data_hash, service_uuid):
         """Compute the weight of a Keep service endpoint for a data
         block with a known hash.
@@ -584,34 +725,50 @@ class KeepClient(object):
         """
         return hashlib.md5(data_hash + service_uuid[-15:]).hexdigest()
 
-    def weighted_service_roots(self, data_hash, force_rebuild=False):
+    def weighted_service_roots(self, locator, force_rebuild=False):
         """Return an array of Keep service endpoints, in the order in
         which they should be probed when reading or writing data with
-        the given hash.
+        the given hash+hints.
         """
         self.build_services_list(force_rebuild)
 
-        # Sort the available services by weight (heaviest first) for
-        # this data_hash, and return their service_roots (base URIs)
+        sorted_roots = []
+
+        # Use the services indicated by the given +K@... remote
+        # service hints, if any are present and can be resolved to a
+        # URI.
+        for hint in locator.hints:
+            if hint.startswith('K@'):
+                if len(hint) == 7:
+                    sorted_roots.append(
+                        "https://keep.{}.arvadosapi.com/".format(hint[2:]))
+                elif len(hint) == 29:
+                    svc = self._gateway_services.get(hint[2:])
+                    if svc:
+                        sorted_roots.append(svc['_service_root'])
+
+        # Sort the available local services by weight (heaviest first)
+        # for this locator, and return their service_roots (base URIs)
         # in that order.
-        sorted_roots = [
+        sorted_roots.extend([
             svc['_service_root'] for svc in sorted(
                 self._keep_services,
                 reverse=True,
-                key=lambda svc: self._service_weight(data_hash, svc['uuid']))]
-        _logger.debug(data_hash + ': ' + str(sorted_roots))
+                key=lambda svc: self._service_weight(locator.md5sum, svc['uuid']))])
+        _logger.debug("{}: {}".format(locator, sorted_roots))
         return sorted_roots
 
-    def map_new_services(self, roots_map, md5_s, force_rebuild, **headers):
+    def map_new_services(self, roots_map, locator, force_rebuild, **headers):
         # roots_map is a dictionary, mapping Keep service root strings
         # to KeepService objects.  Poll for Keep services, and add any
         # new ones to roots_map.  Return the current list of local
         # root strings.
         headers.setdefault('Authorization', "OAuth2 %s" % (self.api_token,))
-        local_roots = self.weighted_service_roots(md5_s, force_rebuild)
+        local_roots = self.weighted_service_roots(locator, force_rebuild)
         for root in local_roots:
             if root not in roots_map:
-                roots_map[root] = self.KeepService(root, self.session, **headers)
+                roots_map[root] = self.KeepService(
+                    root, self._user_agent_pool, **headers)
         return local_roots
 
     @staticmethod
@@ -635,7 +792,7 @@ class KeepClient(object):
     def get_from_cache(self, loc):
         """Fetch a block only if is in the cache, otherwise return None."""
         slot = self.block_cache.get(loc)
-        if slot.ready.is_set():
+        if slot is not None and slot.ready.is_set():
             return slot.get()
         else:
             return None
@@ -664,28 +821,43 @@ class KeepClient(object):
         if ',' in loc_s:
             return ''.join(self.get(x) for x in loc_s.split(','))
         locator = KeepLocator(loc_s)
-        expect_hash = locator.md5sum
-        slot, first = self.block_cache.reserve_cache(expect_hash)
+        slot, first = self.block_cache.reserve_cache(locator.md5sum)
         if not first:
             v = slot.get()
             return v
 
+        # If the locator has hints specifying a prefix (indicating a
+        # remote keepproxy) or the UUID of a local gateway service,
+        # read data from the indicated service(s) instead of the usual
+        # list of local disk services.
+        hint_roots = ['http://keep.{}.arvadosapi.com/'.format(hint[2:])
+                      for hint in locator.hints if hint.startswith('K@') and len(hint) == 7]
+        hint_roots.extend([self._gateway_services[hint[2:]]['_service_root']
+                           for hint in locator.hints if (
+                                   hint.startswith('K@') and
+                                   len(hint) == 29 and
+                                   self._gateway_services.get(hint[2:])
+                                   )])
+        # Map root URLs to their KeepService objects.
+        roots_map = {
+            root: self.KeepService(root, self._user_agent_pool)
+            for root in hint_roots
+        }
+
         # See #3147 for a discussion of the loop implementation.  Highlights:
         # * Refresh the list of Keep services after each failure, in case
         #   it's being updated.
         # * Retry until we succeed, we're out of retries, or every available
         #   service has returned permanent failure.
-        hint_roots = ['http://keep.{}.arvadosapi.com/'.format(hint[2:])
-                      for hint in locator.hints if hint.startswith('K@')]
-        # Map root URLs their KeepService objects.
-        roots_map = {root: self.KeepService(root, self.session) for root in hint_roots}
+        sorted_roots = []
+        roots_map = {}
         blob = None
         loop = retry.RetryLoop(num_retries, self._check_loop_result,
                                backoff_start=2)
         for tries_left in loop:
             try:
-                local_roots = self.map_new_services(
-                    roots_map, expect_hash,
+                sorted_roots = self.map_new_services(
+                    roots_map, locator,
                     force_rebuild=(tries_left < num_retries))
             except Exception as error:
                 loop.save_result(error)
@@ -694,7 +866,7 @@ class KeepClient(object):
             # Query KeepService objects that haven't returned
             # permanent failure, in our specified shuffle order.
             services_to_try = [roots_map[root]
-                               for root in (local_roots + hint_roots)
+                               for root in sorted_roots
                                if roots_map[root].usable()]
             for keep_service in services_to_try:
                 blob = keep_service.get(locator, timeout=self.current_timeout(num_retries-tries_left))
@@ -708,22 +880,17 @@ class KeepClient(object):
         if loop.success():
             return blob
 
-        try:
-            all_roots = local_roots + hint_roots
-        except NameError:
-            # We never successfully fetched local_roots.
-            all_roots = hint_roots
         # Q: Including 403 is necessary for the Keep tests to continue
         # passing, but maybe they should expect KeepReadError instead?
-        not_founds = sum(1 for key in all_roots
-                         if roots_map[key].last_status() in {403, 404, 410})
-        service_errors = ((key, roots_map[key].last_result)
-                          for key in all_roots)
+        not_founds = sum(1 for key in sorted_roots
+                         if roots_map[key].last_result().get('status_code', None) in {403, 404, 410})
+        service_errors = ((key, roots_map[key].last_result()['error'])
+                          for key in sorted_roots)
         if not roots_map:
             raise arvados.errors.KeepReadError(
                 "failed to read {}: no Keep services available ({})".format(
                     loc_s, loop.last_result()))
-        elif not_founds == len(all_roots):
+        elif not_founds == len(sorted_roots):
             raise arvados.errors.NotFoundError(
                 "{} not found".format(loc_s), service_errors)
         else:
@@ -758,6 +925,7 @@ class KeepClient(object):
         data_hash = hashlib.md5(data).hexdigest()
         if copies < 1:
             return data_hash
+        locator = KeepLocator(data_hash + '+' + str(len(data)))
 
         headers = {}
         if self.using_proxy:
@@ -770,7 +938,7 @@ class KeepClient(object):
         for tries_left in loop:
             try:
                 local_roots = self.map_new_services(
-                    roots_map, data_hash,
+                    roots_map, locator,
                     force_rebuild=(tries_left < num_retries), **headers)
             except Exception as error:
                 loop.save_result(error)
@@ -800,9 +968,9 @@ class KeepClient(object):
                 "failed to write {}: no Keep services available ({})".format(
                     data_hash, loop.last_result()))
         else:
-            service_errors = ((key, roots_map[key].last_result)
+            service_errors = ((key, roots_map[key].last_result()['error'])
                               for key in local_roots
-                              if not roots_map[key].success_flag)
+                              if roots_map[key].last_result()['error'])
             raise arvados.errors.KeepWriteError(
                 "failed to write {} (wanted {} copies but wrote {})".format(
                     data_hash, copies, thread_limiter.done()), service_errors, label="service")
index 52a68faa6f6b511bf78378cc944aa6e4c5914c33..e4ad6440a7130e0ff0b4891d3b516cc4a7531c9d 100644 (file)
@@ -2,6 +2,7 @@
 
 import functools
 import inspect
+import pycurl
 import time
 
 from collections import deque
@@ -109,11 +110,11 @@ class RetryLoop(object):
                 "queried loop results before any were recorded")
 
 
-def check_http_response_success(result):
-    """Convert a 'requests' response to a loop control flag.
+def check_http_response_success(status_code):
+    """Convert an HTTP status code to a loop control flag.
 
-    Pass this method a requests.Response object.  It returns True if
-    the response indicates success, None if it indicates temporary
+    Pass this method a numeric HTTP status code.  It returns True if
+    the code indicates success, None if it indicates temporary
     failure, and False otherwise.  You can use this as the
     success_check for a RetryLoop.
 
@@ -128,15 +129,11 @@ def check_http_response_success(result):
       3xx status codes.  They don't indicate success, and you can't
       retry those requests verbatim.
     """
-    try:
-        status = result.status_code
-    except Exception:
-        return None
-    if status in _HTTP_SUCCESSES:
+    if status_code in _HTTP_SUCCESSES:
         return True
-    elif status in _HTTP_CAN_RETRY:
+    elif status_code in _HTTP_CAN_RETRY:
         return None
-    elif 100 <= status < 600:
+    elif 100 <= status_code < 600:
         return False
     else:
         return None  # Get well soon, server.
index ca28025fea64ae6b388af4388337814494a8c0c8..571faf169dd054eb5b131bcc8dc6ada2af763ae9 100644 (file)
@@ -26,23 +26,24 @@ setup(name='arvados-python-client',
       license='Apache 2.0',
       packages=find_packages(),
       scripts=[
-        'bin/arv-copy',
-        'bin/arv-get',
-        'bin/arv-keepdocker',
-        'bin/arv-ls',
-        'bin/arv-normalize',
-        'bin/arv-put',
-        'bin/arv-run',
-        'bin/arv-ws'
-        ],
+          'bin/arv-copy',
+          'bin/arv-get',
+          'bin/arv-keepdocker',
+          'bin/arv-ls',
+          'bin/arv-normalize',
+          'bin/arv-put',
+          'bin/arv-run',
+          'bin/arv-ws'
+      ],
       install_requires=[
-        'python-gflags',
-        'google-api-python-client',
-        'httplib2',
-        'requests>=2.4',
-        'urllib3',
-        'ws4py'
-        ],
+          'google-api-python-client',
+          'httplib2',
+          'pycurl>=7.19',
+          'python-gflags',
+          'requests>=2.4',
+          'urllib3',
+          'ws4py'
+      ],
       test_suite='tests',
       tests_require=['mock>=1.0', 'PyYAML'],
       zip_safe=False,
index 644dfffbaca0657a934a43cf0742e03cc227f62b..d138e5964a44eb6c26eafafb3f4272e9b9b1de9c 100644 (file)
@@ -8,8 +8,8 @@ import httplib2
 import io
 import mock
 import os
+import pycurl
 import Queue
-import requests
 import shutil
 import tempfile
 import unittest
@@ -43,44 +43,80 @@ def mock_responses(body, *codes, **headers):
     return mock.patch('httplib2.Http.request', side_effect=queue_with((
         (fake_httplib2_response(code, **headers), body) for code in codes)))
 
-# fake_requests_response, mock_get_responses and mock_put_responses
-# mock calls to requests.get() and requests.put()
-def fake_requests_response(code, body, **headers):
-    r = requests.Response()
-    r.status_code = code
-    r.reason = httplib.responses.get(code, "Unknown Response")
-    r.headers = headers
-    r.raw = io.BytesIO(body)
-    return r
-
-# The following methods patch requests.Session(), where return_value is a mock
-# Session object.  The put/get attributes are set on mock Session, and the
-# desired put/get behavior is set on the put/get mocks.
-
-def mock_put_responses(body, *codes, **headers):
-    m = mock.MagicMock()
+
+class FakeCurl:
+    @classmethod
+    def make(cls, code, body='', headers={}):
+        return mock.Mock(spec=cls, wraps=cls(code, body, headers))
+
+    def __init__(self, code=200, body='', headers={}):
+        self._opt = {}
+        self._got_url = None
+        self._writer = None
+        self._headerfunction = None
+        self._resp_code = code
+        self._resp_body = body
+        self._resp_headers = headers
+
+    def getopt(self, opt):
+        return self._opt.get(str(opt), None)
+
+    def setopt(self, opt, val):
+        self._opt[str(opt)] = val
+        if opt == pycurl.WRITEFUNCTION:
+            self._writer = val
+        elif opt == pycurl.HEADERFUNCTION:
+            self._headerfunction = val
+
+    def perform(self):
+        if not isinstance(self._resp_code, int):
+            raise self._resp_code
+        if self.getopt(pycurl.URL) is None:
+            raise ValueError
+        if self._writer is None:
+            raise ValueError
+        if self._headerfunction:
+            self._headerfunction("HTTP/1.1 {} Status".format(self._resp_code))
+            for k, v in self._resp_headers.iteritems():
+                self._headerfunction(k + ': ' + str(v))
+        self._writer(self._resp_body)
+
+    def close(self):
+        pass
+
+    def reset(self):
+        """Prevent fake UAs from going back into the user agent pool."""
+        raise Exception
+
+    def getinfo(self, opt):
+        if opt == pycurl.RESPONSE_CODE:
+            return self._resp_code
+        raise Exception
+
+def mock_keep_responses(body, *codes, **headers):
+    """Patch pycurl to return fake responses and raise exceptions.
+
+    body can be a string to return as the response body; an exception
+    to raise when perform() is called; or an iterable that returns a
+    sequence of such values.
+    """
+    cm = mock.MagicMock()
     if isinstance(body, tuple):
         codes = list(codes)
         codes.insert(0, body)
-        m.return_value.put.side_effect = queue_with((fake_requests_response(code, b, **headers) for b, code in codes))
+        responses = [
+            FakeCurl.make(code=code, body=b, headers=headers)
+            for b, code in codes
+        ]
     else:
-        m.return_value.put.side_effect = queue_with((fake_requests_response(code, body, **headers) for code in codes))
-    return mock.patch('requests.Session', m)
-
-def mock_get_responses(body, *codes, **headers):
-    m = mock.MagicMock()
-    m.return_value.get.side_effect = queue_with((fake_requests_response(code, body, **headers) for code in codes))
-    return mock.patch('requests.Session', m)
-
-def mock_get(side_effect):
-    m = mock.MagicMock()
-    m.return_value.get.side_effect = side_effect
-    return mock.patch('requests.Session', m)
+        responses = [
+            FakeCurl.make(code=code, body=body, headers=headers)
+            for code in codes
+        ]
+    cm.side_effect = queue_with(responses)
+    cm.responses = responses
+    return mock.patch('pycurl.Curl', cm)
 
-def mock_put(side_effect):
-    m = mock.MagicMock()
-    m.return_value.put.side_effect = side_effect
-    return mock.patch('requests.Session', m)
 
 class MockStreamReader(object):
     def __init__(self, name='.', *data):
@@ -104,7 +140,8 @@ class ApiClientMock(object):
                            service_type='disk',
                            service_host=None,
                            service_port=None,
-                           service_ssl_flag=False):
+                           service_ssl_flag=False,
+                           additional_services=[]):
         if api_mock is None:
             api_mock = self.api_client_mock()
         body = {
@@ -116,7 +153,7 @@ class ApiClientMock(object):
                 'service_port': service_port or 65535-i,
                 'service_ssl_flag': service_ssl_flag,
                 'service_type': service_type,
-            } for i in range(0, count)]
+            } for i in range(0, count)] + additional_services
         }
         self._mock_api_call(api_mock.keep_services().accessible, status, body)
         return api_mock
diff --git a/sdk/python/tests/keepstub.py b/sdk/python/tests/keepstub.py
new file mode 100644 (file)
index 0000000..ef724ed
--- /dev/null
@@ -0,0 +1,104 @@
+import BaseHTTPServer
+import hashlib
+import os
+import re
+import SocketServer
+import time
+
+class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, object):
+
+    allow_reuse_address = 1
+
+    def __init__(self, *args, **kwargs):
+        self.store = {}
+        self.delays = {
+            # before reading request headers
+            'request': 0,
+            # before reading request body
+            'request_body': 0,
+            # before setting response status and headers
+            'response': 0,
+            # before sending response body
+            'response_body': 0,
+            # before returning from handler (thus setting response EOF)
+            'response_close': 0,
+        }
+        super(Server, self).__init__(*args, **kwargs)
+
+    def setdelays(self, **kwargs):
+        """In future requests, induce delays at the given checkpoints."""
+        for (k, v) in kwargs.iteritems():
+            self.delays.get(k) # NameError if unknown key
+            self.delays[k] = v
+
+    def _sleep_at_least(self, seconds):
+        """Sleep for given time, even if signals are received."""
+        wake = time.time() + seconds
+        todo = seconds
+        while todo > 0:
+            time.sleep(todo)
+            todo = wake - time.time()
+
+    def _do_delay(self, k):
+        self._sleep_at_least(self.delays[k])
+
+
+class Handler(BaseHTTPServer.BaseHTTPRequestHandler, object):
+    def handle(self, *args, **kwargs):
+        self.server._do_delay('request')
+        return super(Handler, self).handle(*args, **kwargs)
+
+    def do_GET(self):
+        self.server._do_delay('response')
+        r = re.search(r'[0-9a-f]{32}', self.path)
+        if not r:
+            return self.send_response(422)
+        datahash = r.group(0)
+        if datahash not in self.server.store:
+            return self.send_response(404)
+        self.send_response(200)
+        self.send_header('Content-type', 'application/octet-stream')
+        self.end_headers()
+        self.server._do_delay('response_body')
+        self.wfile.write(self.server.store[datahash])
+        self.server._do_delay('response_close')
+
+    def do_PUT(self):
+        self.server._do_delay('request_body')
+
+        # The comments at https://bugs.python.org/issue1491 implies that Python
+        # 2.7 BaseHTTPRequestHandler was patched to support 100 Continue, but
+        # reading the actual code that ships in Debian it clearly is not, so we
+        # need to send the response on the socket directly.
+
+        self.wfile.write("%s %d %s\r\n\r\n" %
+                         (self.protocol_version, 100, "Continue"))
+
+        data = self.rfile.read(int(self.headers.getheader('content-length')))
+        datahash = hashlib.md5(data).hexdigest()
+        self.server.store[datahash] = data
+        self.server._do_delay('response')
+        self.send_response(200)
+        self.send_header('Content-type', 'text/plain')
+        self.end_headers()
+        self.server._do_delay('response_body')
+        self.wfile.write(datahash + '+' + str(len(data)))
+        self.server._do_delay('response_close')
+
+    def log_request(self, *args, **kwargs):
+        if os.environ.get('ARVADOS_DEBUG', None):
+            super(Handler, self).log_request(*args, **kwargs)
+
+    def finish(self, *args, **kwargs):
+        """Ignore exceptions, notably "Broken pipe" when client times out."""
+        try:
+            return super(Handler, self).finish(*args, **kwargs)
+        except:
+            pass
+
+    def handle_one_request(self, *args, **kwargs):
+        """Ignore exceptions, notably "Broken pipe" when client times out."""
+        try:
+            return super(Handler, self).handle_one_request(*args, **kwargs)
+        except:
+            pass
diff --git a/sdk/python/tests/nginx.conf b/sdk/python/tests/nginx.conf
new file mode 100644 (file)
index 0000000..6196605
--- /dev/null
@@ -0,0 +1,31 @@
+daemon off;
+error_log stderr info;          # Yes, must be specified here _and_ cmdline
+events {
+}
+http {
+  access_log /dev/stderr combined;
+  upstream arv-git-http {
+    server localhost:{{GITPORT}};
+  }
+  server {
+    listen *:{{GITSSLPORT}} ssl default_server;
+    server_name _;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://arv-git-http;
+    }
+  }
+  upstream keepproxy {
+    server localhost:{{KEEPPROXYPORT}};
+  }
+  server {
+    listen *:{{KEEPPROXYSSLPORT}} ssl default_server;
+    server_name _;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keepproxy;
+    }
+  }
+}
index b9502f0f8e5bb5a6292da85bb69ef1f783163cab..271b525df2bc8cd404feb357c1ea3d080fd037e6 100644 (file)
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
 import argparse
 import atexit
 import httplib2
@@ -41,6 +42,7 @@ if not os.path.exists(TEST_TMPDIR):
     os.mkdir(TEST_TMPDIR)
 
 my_api_host = None
+_cached_config = {}
 
 def find_server_pid(PID_PATH, wait=10):
     now = time.time()
@@ -178,6 +180,13 @@ def run(leave_running_atexit=False):
             '-subj', '/CN=0.0.0.0'],
         stdout=sys.stderr)
 
+    # Install the git repository fixtures.
+    gitdir = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
+    gittarball = os.path.join(SERVICES_SRC_DIR, 'api', 'test', 'test.git.tar')
+    if not os.path.isdir(gitdir):
+        os.makedirs(gitdir)
+    subprocess.check_output(['tar', '-xC', gitdir, '-f', gittarball])
+
     port = find_available_port()
     env = os.environ.copy()
     env['RAILS_ENV'] = 'test'
@@ -256,15 +265,16 @@ def _start_keep(n, keep_args):
     keep0 = tempfile.mkdtemp()
     port = find_available_port()
     keep_cmd = ["keepstore",
-                "-volumes={}".format(keep0),
+                "-volume={}".format(keep0),
                 "-listen=:{}".format(port),
-                "-pid={}".format("{}/keep{}.pid".format(TEST_TMPDIR, n))]
+                "-pid="+_pidfile('keep{}'.format(n))]
 
     for arg, val in keep_args.iteritems():
         keep_cmd.append("{}={}".format(arg, val))
 
-    kp0 = subprocess.Popen(keep_cmd)
-    with open("{}/keep{}.pid".format(TEST_TMPDIR, n), 'w') as f:
+    kp0 = subprocess.Popen(
+        keep_cmd, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('keep{}'.format(n)), 'w') as f:
         f.write(str(kp0.pid))
 
     with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'w') as f:
@@ -307,7 +317,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
         }).execute()
 
 def _stop_keep(n):
-    kill_server_pid("{}/keep{}.pid".format(TEST_TMPDIR, n), 0)
+    kill_server_pid(_pidfile('keep{}'.format(n)), 0)
     if os.path.exists("{}/keep{}.volume".format(TEST_TMPDIR, n)):
         with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'r') as r:
             shutil.rmtree(r.read(), True)
@@ -320,6 +330,8 @@ def stop_keep():
     _stop_keep(1)
 
 def run_keep_proxy():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
     stop_keep_proxy()
 
     admin_token = auth_token('admin')
@@ -328,9 +340,9 @@ def run_keep_proxy():
     env['ARVADOS_API_TOKEN'] = admin_token
     kp = subprocess.Popen(
         ['keepproxy',
-         '-pid={}/keepproxy.pid'.format(TEST_TMPDIR),
+         '-pid='+_pidfile('keepproxy'),
          '-listen=:{}'.format(port)],
-        env=env)
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
 
     api = arvados.api(
         version='v1',
@@ -347,9 +359,100 @@ def run_keep_proxy():
         'service_ssl_flag': False,
     }}).execute()
     os.environ["ARVADOS_KEEP_PROXY"] = "http://localhost:{}".format(port)
+    _setport('keepproxy', port)
 
 def stop_keep_proxy():
-    kill_server_pid(os.path.join(TEST_TMPDIR, "keepproxy.pid"), 0)
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keepproxy'), wait=0)
+
+def run_arv_git_httpd():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_arv_git_httpd()
+
+    gitdir = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
+    gitport = find_available_port()
+    env = os.environ.copy()
+    env.pop('ARVADOS_API_TOKEN', None)
+    agh = subprocess.Popen(
+        ['arv-git-httpd',
+         '-repo-root='+gitdir+'/test',
+         '-address=:'+str(gitport)],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('arv-git-httpd'), 'w') as f:
+        f.write(str(agh.pid))
+    _setport('arv-git-httpd', gitport)
+
+def stop_arv_git_httpd():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
+
+def run_nginx():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    nginxconf = {}
+    nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
+    nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
+    nginxconf['GITPORT'] = _getport('arv-git-httpd')
+    nginxconf['GITSSLPORT'] = find_available_port()
+    nginxconf['SSLCERT'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.pem')
+    nginxconf['SSLKEY'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.key')
+
+    conftemplatefile = os.path.join(MY_DIRNAME, 'nginx.conf')
+    conffile = os.path.join(TEST_TMPDIR, 'nginx.conf')
+    with open(conffile, 'w') as f:
+        f.write(re.sub(
+            r'{{([A-Z]+)}}',
+            lambda match: str(nginxconf.get(match.group(1))),
+            open(conftemplatefile).read()))
+
+    env = os.environ.copy()
+    env['PATH'] = env['PATH']+':/sbin:/usr/sbin:/usr/local/sbin'
+    nginx = subprocess.Popen(
+        ['nginx',
+         '-g', 'error_log stderr info;',
+         '-g', 'pid '+_pidfile('nginx')+';',
+         '-c', conffile],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
+    _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
+
+def stop_nginx():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('nginx'), wait=0)
+
+def _pidfile(program):
+    return os.path.join(TEST_TMPDIR, program + '.pid')
+
+def _portfile(program):
+    return os.path.join(TEST_TMPDIR, program + '.port')
+
+def _setport(program, port):
+    with open(_portfile(program), 'w') as f:
+        f.write(str(port))
+
+# Returns 9 if program is not up.
+def _getport(program):
+    try:
+        return int(open(_portfile(program)).read())
+    except IOError:
+        return 9
+
+def _apiconfig(key):
+    if _cached_config:
+        return _cached_config[key]
+    def _load(f):
+        return yaml.load(os.path.join(SERVICES_SRC_DIR, 'api', 'config', f))
+    cdefault = _load('application.default.yml')
+    csite = _load('application.yml')
+    _cached_config = {}
+    for section in [cdefault.get('common',{}), cdefault.get('test',{}),
+                    csite.get('common',{}), csite.get('test',{})]:
+        _cached_config.update(section)
+    return _cached_config[key]
 
 def fixture(fix):
     '''load a fixture yaml file'''
@@ -431,14 +534,21 @@ class TestCaseWithServers(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    actions = ['start', 'stop',
-               'start_keep', 'stop_keep',
-               'start_keep_proxy', 'stop_keep_proxy']
+    actions = [
+        'start', 'stop',
+        'start_keep', 'stop_keep',
+        'start_keep_proxy', 'stop_keep_proxy',
+        'start_arv-git-httpd', 'stop_arv-git-httpd',
+        'start_nginx', 'stop_nginx',
+    ]
     parser = argparse.ArgumentParser()
     parser.add_argument('action', type=str, help="one of {}".format(actions))
     parser.add_argument('--auth', type=str, metavar='FIXTURE_NAME', help='Print authorization info for given api_client_authorizations fixture')
     args = parser.parse_args()
 
+    if args.action not in actions:
+        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions), file=sys.stderr)
+        sys.exit(1)
     if args.action == 'start':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
         run(leave_running_atexit=True)
@@ -460,5 +570,13 @@ if __name__ == "__main__":
         run_keep_proxy()
     elif args.action == 'stop_keep_proxy':
         stop_keep_proxy()
+    elif args.action == 'start_arv-git-httpd':
+        run_arv_git_httpd()
+    elif args.action == 'stop_arv-git-httpd':
+        stop_arv_git_httpd()
+    elif args.action == 'start_nginx':
+        run_nginx()
+    elif args.action == 'stop_nginx':
+        stop_nginx()
     else:
-        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions))
+        raise Exception("action recognized but not implemented!?")
index 825465cb4aae15e05876217c9fcbb74db3dce0c4..1701aa43caae5baaa3f25522230c5b9c4ac46994 100644 (file)
@@ -128,7 +128,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
         writer.write("world")
         self.assertEqual(writer.read(20), "0123456789helloworld")
 
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
 
     def test_write_at_beginning(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -140,14 +140,36 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("foo")
             self.assertEqual(writer.size(), 10)
             self.assertEqual("foo3456789", writer.readfrom(0, 13))
-            self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.manifest_text())
+            self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
 
     def test_write_empty(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
         with Collection(keep_client=keep) as c:
             writer = c.open("count.txt", "w")
             self.assertEqual(writer.size(), 0)
-            self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.manifest_text())
+            self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
+
+    def test_save_manifest_text(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        with Collection(keep_client=keep) as c:
+            writer = c.open("count.txt", "w")
+            writer.write("0123456789")
+            self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
+            self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+
+            self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
+            self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+
+    def test_get_manifest_text_commits(self):
+         keep = ArvadosFileWriterTestCase.MockKeep({})
+         with Collection(keep_client=keep) as c:
+             writer = c.open("count.txt", "w")
+             writer.write("0123456789")
+             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
+             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
+             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+
 
     def test_write_in_middle(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -159,7 +181,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("foo")
             self.assertEqual(writer.size(), 10)
             self.assertEqual("012foo6789", writer.readfrom(0, 13))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
 
     def test_write_at_end(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -171,7 +193,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("foo")
             self.assertEqual(writer.size(), 10)
             self.assertEqual("0123456foo", writer.readfrom(0, 13))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
 
     def test_write_across_segment_boundary(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -183,7 +205,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("foobar")
             self.assertEqual(writer.size(), 20)
             self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
 
     def test_write_across_several_segments(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -195,7 +217,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             writer.write("abcdefg")
             self.assertEqual(writer.size(), 12)
             self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
 
     def test_write_large(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -228,11 +250,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
 
             self.assertEqual(writer.size(), 10)
             self.assertEqual("0123456789", writer.readfrom(0, 20))
-            self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
             writer.flush()
             self.assertEqual(writer.size(), 10)
             self.assertEqual("0123456789", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
 
     def test_rewrite_append_existing_file(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -245,12 +267,12 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
 
             self.assertEqual(writer.size(), 20)
             self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
 
             writer.arvadosfile.flush()
             self.assertEqual(writer.size(), 20)
             self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
 
     def test_rewrite_over_existing_file(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -263,13 +285,13 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
 
             self.assertEqual(writer.size(), 15)
             self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
 
             writer.arvadosfile.flush()
 
             self.assertEqual(writer.size(), 15)
             self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
 
     def test_write_large_rewrite(self):
         keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -418,20 +440,25 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
         af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
         return ArvadosFileReader(af, "count.txt")
 
-    def test_read_returns_first_block(self):
-        # read() calls will be aligned on block boundaries - see #3663.
+    def test_read_block_crossing_behavior(self):
+        # read() needs to return all the data requested if possible, even if it
+        # crosses uncached blocks: https://arvados.org/issues/5856
         sfile = self.make_count_reader(nocache=True)
-        self.assertEqual('123', sfile.read(10))
+        self.assertEqual('12345678', sfile.read(8))
 
     def test_successive_reads(self):
+        # Override StreamFileReaderTestCase.test_successive_reads
         sfile = self.make_count_reader(nocache=True)
-        for expect in ['123', '456', '789', '']:
-            self.assertEqual(expect, sfile.read(10))
+        self.assertEqual('1234', sfile.read(4))
+        self.assertEqual('5678', sfile.read(4))
+        self.assertEqual('9', sfile.read(4))
+        self.assertEqual('', sfile.read(4))
 
     def test_tell_after_block_read(self):
+        # Override StreamFileReaderTestCase.test_tell_after_block_read
         sfile = self.make_count_reader(nocache=True)
-        sfile.read(5)
-        self.assertEqual(3, sfile.tell())
+        self.assertEqual('12345678', sfile.read(8))
+        self.assertEqual(8, sfile.tell())
 
     def test_prefetch(self):
         keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
@@ -569,8 +596,11 @@ class BlockManagerTest(unittest.TestCase):
         mockkeep = mock.MagicMock()
         blockmanager = arvados.arvfile._BlockManager(mockkeep)
         bufferblock = blockmanager.alloc_bufferblock()
+        bufferblock.owner = mock.MagicMock()
+        bufferblock.owner.flush.side_effect = lambda: blockmanager.commit_bufferblock(bufferblock)
         bufferblock.append("foo")
         blockmanager.commit_all()
+        self.assertTrue(bufferblock.owner.flush.called)
         self.assertTrue(mockkeep.put.called)
         self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
         self.assertIsNone(bufferblock.buffer_view)
@@ -581,8 +611,11 @@ class BlockManagerTest(unittest.TestCase):
         mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
         blockmanager = arvados.arvfile._BlockManager(mockkeep)
         bufferblock = blockmanager.alloc_bufferblock()
+        bufferblock.owner = mock.MagicMock()
+        bufferblock.owner.flush.side_effect = lambda: blockmanager.commit_bufferblock(bufferblock)
         bufferblock.append("foo")
         with self.assertRaises(arvados.errors.KeepWriteError) as err:
             blockmanager.commit_all()
+        self.assertTrue(bufferblock.owner.flush.called)
         self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
         self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
index 8cf34f0282655fd88f27330ec96e5e0b362370cc..5e1a055d483695c864df6adaf0572368b0f0e165 100644 (file)
@@ -551,7 +551,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
     def test_locator_init(self):
         client = self.api_client_mock(200)
         # Ensure Keep will not return anything if asked.
-        with tutil.mock_get_responses(None, 404):
+        with tutil.mock_keep_responses(None, 404):
             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
                                               api_client=client)
             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
@@ -561,7 +561,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         # been written to Keep.
         client = self.api_client_mock(200)
         self.mock_get_collection(client, 404, None)
-        with tutil.mock_get_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
                                               api_client=client)
             self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
@@ -569,7 +569,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
     def test_uuid_init_no_fallback_to_keep(self):
         # Do not look up a collection UUID in Keep.
         client = self.api_client_mock(404)
-        with tutil.mock_get_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
             with self.assertRaises(arvados.errors.ApiError):
                 reader = arvados.CollectionReader(self.DEFAULT_UUID,
                                                   api_client=client)
@@ -578,7 +578,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         # To verify that CollectionReader tries Keep first here, we
         # mock API server to return the wrong data.
         client = self.api_client_mock(200)
-        with tutil.mock_get_responses(self.ALT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.ALT_MANIFEST, 200):
             self.assertEqual(
                 self.ALT_MANIFEST,
                 arvados.CollectionReader(
@@ -590,7 +590,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         client = self.api_client_mock(200)
         reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
                                           num_retries=3)
-        with tutil.mock_get_responses('foo', 500, 500, 200):
+        with tutil.mock_keep_responses('foo', 500, 500, 200):
             self.assertEqual('foo',
                              ''.join(f.read(9) for f in reader.all_files()))
 
@@ -630,7 +630,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
     def test_api_response_with_collection_from_keep(self):
         client = self.api_client_mock()
         self.mock_get_collection(client, 404, 'foo')
-        with tutil.mock_get_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
             reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
                                               api_client=client)
             api_response = reader.api_response()
@@ -673,7 +673,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
 class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
     def mock_keep(self, body, *codes, **headers):
         headers.setdefault('x-keep-replicas-stored', 2)
-        return tutil.mock_put_responses(body, *codes, **headers)
+        return tutil.mock_keep_responses(body, *codes, **headers)
 
     def foo_writer(self, **kwargs):
         kwargs.setdefault('api_client', self.api_client_mock())
@@ -695,7 +695,7 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
 
     def test_write_insufficient_replicas_via_proxy(self):
         writer = self.foo_writer(replication=3)
-        with self.mock_keep(None, 200, headers={'x-keep-replicas-stored': 2}):
+        with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
             with self.assertRaises(arvados.errors.KeepWriteError):
                 writer.manifest_text()
 
@@ -712,15 +712,12 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
     def test_write_three_replicas(self):
         client = mock.MagicMock(name='api_client')
         with self.mock_keep(
-                None, 500, 500, 500, 200, 200, 200,
+                "", 500, 500, 500, 200, 200, 200,
                 **{'x-keep-replicas-stored': 1}) as keepmock:
             self.mock_keep_services(client, status=200, service_type='disk', count=6)
             writer = self.foo_writer(api_client=client, replication=3)
             writer.manifest_text()
-            # keepmock is the mock session constructor; keepmock.return_value
-            # is the mock session object, and keepmock.return_value.put is the
-            # actual mock method of interest.
-            self.assertEqual(6, keepmock.return_value.put.call_count)
+            self.assertEqual(6, keepmock.call_count)
 
     def test_write_whole_collection_through_retries(self):
         writer = self.foo_writer(num_retries=2)
@@ -830,11 +827,11 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 
     def test_remove(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
         self.assertIn("count1.txt", c)
         c.remove("count1.txt")
         self.assertNotIn("count1.txt", c)
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
         with self.assertRaises(arvados.errors.ArgumentError):
             c.remove("")
 
@@ -851,45 +848,45 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
     def test_remove_in_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
 
     def test_remove_empty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
         c.remove("foo")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
 
     def test_remove_nonempty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         with self.assertRaises(IOError):
             c.remove("foo")
         c.remove("foo", recursive=True)
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
 
     def test_copy_to_file_in_dir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
         c.copy("count1.txt", "foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
 
     def test_copy_file(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
         c.copy("count1.txt", "count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
 
     def test_copy_to_existing_dir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.copy("count1.txt", "foo")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
 
     def test_copy_to_new_dir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
         c.copy("count1.txt", "foo/")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
 
     def test_clone(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         cl = c.clone()
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
 
     def test_diff_del_add(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -900,9 +897,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         d = c1.diff(c2)
         self.assertEqual(d, [('del', './count1.txt', c1["count1.txt"]),
                              ('add', './count2.txt', c2["count2.txt"])])
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_diff_same(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -912,9 +909,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         d = c1.diff(c2)
         self.assertEqual(d, [])
 
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_diff_mod(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -924,9 +921,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         d = c1.diff(c2)
         self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
 
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_diff_add(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -936,9 +933,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         d = c1.diff(c2)
         self.assertEqual(d, [('add', './count2.txt', c2["count2.txt"])])
 
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_diff_add_in_subcollection(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -948,9 +945,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         d = c1.diff(c2)
         self.assertEqual(d, [('add', './foo', c2["foo"])])
 
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_diff_del_add_in_subcollection(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
@@ -963,9 +960,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertEqual(d, [('del', './foo/count2.txt', c1.find("foo/count2.txt")),
                              ('add', './foo/count3.txt', c2.find("foo/count3.txt"))])
 
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_diff_mod_in_subcollection(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
@@ -975,9 +972,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         d = c1.diff(c2)
         self.assertEqual(d, [('mod', './foo', c1["foo"], c2["foo"])])
 
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
 
     def test_conflict_keep_local_change(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -990,7 +987,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 
         # c1 changed, so it should not be deleted.
         c1.apply(d)
-        self.assertEqual(c1.manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
+        self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
 
     def test_conflict_mod(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
@@ -1002,7 +999,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 
         # c1 changed, so c2 mod will go to a conflict file
         c1.apply(d)
-        self.assertRegexpMatches(c1.manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
 
     def test_conflict_add(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
@@ -1015,7 +1012,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 
         # c1 added count1.txt, so c2 add will go to a conflict file
         c1.apply(d)
-        self.assertRegexpMatches(c1.manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
 
     def test_conflict_del(self):
         c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
@@ -1026,7 +1023,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
 
         # c1 deleted, so c2 mod will go to a conflict file
         c1.apply(d)
-        self.assertRegexpMatches(c1.manifest_text(), r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
 
     def test_notify(self):
         c1 = Collection()
@@ -1058,7 +1055,7 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
         with c.open("count.txt", "w") as f:
             f.write("0123456789")
 
-        self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
+        self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
 
         return c
 
@@ -1067,7 +1064,6 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
         c.save()
         self.assertRegexpMatches(c.manifest_text(), r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
 
-
     def test_create_and_save_new(self):
         c = self.create_count_txt()
         c.save_new()
index 6e8df96316aa36510828515d08cb7c3562dcfe99..db875dc212a1567638ad82155817078e0da48773 100644 (file)
@@ -10,8 +10,8 @@ class KeepRequestErrorTestCase(unittest.TestCase):
     REQUEST_ERRORS = [
         ('http://keep1.zzzzz.example.org/', IOError("test IOError")),
         ('http://keep3.zzzzz.example.org/', MemoryError("test MemoryError")),
-        ('http://keep5.zzzzz.example.org/', tutil.fake_requests_response(
-                500, "test 500")),
+        ('http://keep5.zzzzz.example.org/',
+         arv_error.HttpError(500, "Internal Server Error")),
         ('http://keep7.zzzzz.example.org/', IOError("second test IOError")),
         ]
 
index baae28e3d78ff8de224dfaf509856cd625dbe8f4..419f1ce08226339b2549da69f039e32869af27c5 100644 (file)
@@ -1,15 +1,19 @@
 import hashlib
 import mock
 import os
+import pycurl
 import random
 import re
 import socket
+import threading
+import time
 import unittest
 import urlparse
 
 import arvados
 import arvados.retry
 import arvados_testutil as tutil
+import keepstub
 import run_test_server
 
 class KeepTestCase(run_test_server.TestCaseWithServers):
@@ -251,7 +255,7 @@ class KeepProxyTestCase(run_test_server.TestCaseWithServers):
 class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def get_service_roots(self, api_client):
         keep_client = arvados.KeepClient(api_client=api_client)
-        services = keep_client.weighted_service_roots('000000')
+        services = keep_client.weighted_service_roots(arvados.KeepLocator('0'*32))
         return [urlparse.urlparse(url) for url in sorted(services)]
 
     def test_ssl_flag_respected_in_roots(self):
@@ -267,63 +271,66 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
         self.assertEqual('100::1', service.hostname)
         self.assertEqual(10, service.port)
 
-    # test_get_timeout and test_put_timeout test that
-    # KeepClient.get and KeepClient.put use the appropriate timeouts
-    # when connected directly to a Keep server (i.e. non-proxy timeout)
+    # test_*_timeout verify that KeepClient instructs pycurl to use
+    # the appropriate connection and read timeouts. They don't care
+    # whether pycurl actually exhibits the expected timeout behavior
+    # -- those tests are in the KeepClientTimeout test class.
 
     def test_get_timeout(self):
         api_client = self.mock_keep_services(count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_get(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
             keep_client = arvados.KeepClient(api_client=api_client)
             with self.assertRaises(arvados.errors.KeepReadError):
                 keep_client.get('ffffffffffffffffffffffffffffffff')
-            self.assertTrue(mock_session.return_value.get.called)
             self.assertEqual(
-                arvados.KeepClient.DEFAULT_TIMEOUT,
-                mock_session.return_value.get.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]*1000))
 
     def test_put_timeout(self):
         api_client = self.mock_keep_services(count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_put(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
             keep_client = arvados.KeepClient(api_client=api_client)
             with self.assertRaises(arvados.errors.KeepWriteError):
                 keep_client.put('foo')
-            self.assertTrue(mock_session.return_value.put.called)
             self.assertEqual(
-                arvados.KeepClient.DEFAULT_TIMEOUT,
-                mock_session.return_value.put.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]*1000))
 
     def test_proxy_get_timeout(self):
-        # Force a timeout, verifying that the requests.get or
-        # requests.put method was called with the proxy_timeout
-        # setting rather than the default timeout.
         api_client = self.mock_keep_services(service_type='proxy', count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_get(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
             keep_client = arvados.KeepClient(api_client=api_client)
             with self.assertRaises(arvados.errors.KeepReadError):
                 keep_client.get('ffffffffffffffffffffffffffffffff')
-            self.assertTrue(mock_session.return_value.get.called)
             self.assertEqual(
-                arvados.KeepClient.DEFAULT_PROXY_TIMEOUT,
-                mock_session.return_value.get.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
 
     def test_proxy_put_timeout(self):
-        # Force a timeout, verifying that the requests.get or
-        # requests.put method was called with the proxy_timeout
-        # setting rather than the default timeout.
         api_client = self.mock_keep_services(service_type='proxy', count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_put(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
             keep_client = arvados.KeepClient(api_client=api_client)
             with self.assertRaises(arvados.errors.KeepWriteError):
                 keep_client.put('foo')
-            self.assertTrue(mock_session.return_value.put.called)
             self.assertEqual(
-                arvados.KeepClient.DEFAULT_PROXY_TIMEOUT,
-                mock_session.return_value.put.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
 
     def test_probe_order_reference_set(self):
         # expected_order[i] is the probe order for
@@ -344,7 +351,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
         api_client = self.mock_keep_services(count=16)
         keep_client = arvados.KeepClient(api_client=api_client)
         for i, hash in enumerate(hashes):
-            roots = keep_client.weighted_service_roots(hash)
+            roots = keep_client.weighted_service_roots(arvados.KeepLocator(hash))
             got_order = [
                 re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1)
                 for root in roots]
@@ -357,14 +364,14 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
         api_client = self.mock_keep_services(count=initial_services)
         keep_client = arvados.KeepClient(api_client=api_client)
         probes_before = [
-            keep_client.weighted_service_roots(hash) for hash in hashes]
+            keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
         for added_services in range(1, 12):
             api_client = self.mock_keep_services(count=initial_services+added_services)
             keep_client = arvados.KeepClient(api_client=api_client)
             total_penalty = 0
             for hash_index in range(len(hashes)):
                 probe_after = keep_client.weighted_service_roots(
-                    hashes[hash_index])
+                    arvados.KeepLocator(hashes[hash_index]))
                 penalty = probe_after.index(probes_before[hash_index][0])
                 self.assertLessEqual(penalty, added_services)
                 total_penalty += penalty
@@ -397,9 +404,9 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
         aport = random.randint(1024,65535)
         api_client = self.mock_keep_services(service_port=aport, count=16)
         keep_client = arvados.KeepClient(api_client=api_client)
-        with mock.patch('requests.' + verb,
-                        side_effect=socket.timeout) as req_mock, \
-                self.assertRaises(exc_class) as err_check:
+        with mock.patch('pycurl.Curl') as curl_mock, \
+             self.assertRaises(exc_class) as err_check:
+            curl_mock.return_value.side_effect = socket.timeout
             getattr(keep_client, verb)(data)
         urls = [urlparse.urlparse(url)
                 for url in err_check.exception.request_errors()]
@@ -429,7 +436,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
 
     def check_errors_from_last_retry(self, verb, exc_class):
         api_client = self.mock_keep_services(count=2)
-        req_mock = getattr(tutil, 'mock_{}_responses'.format(verb))(
+        req_mock = tutil.mock_keep_responses(
             "retry error reporting test", 500, 500, 403, 403)
         with req_mock, tutil.skip_sleep, \
                 self.assertRaises(exc_class) as err_check:
@@ -450,13 +457,160 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
         data = 'partial failure test'
         data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
         api_client = self.mock_keep_services(count=3)
-        with tutil.mock_put_responses(data_loc, 200, 500, 500) as req_mock, \
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
                 self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
             keep_client = arvados.KeepClient(api_client=api_client)
             keep_client.put(data)
         self.assertEqual(2, len(exc_check.exception.request_errors()))
 
 
+class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
+    DATA = 'x' * 2**10
+
+    class assertTakesBetween(unittest.TestCase):
+        def __init__(self, tmin, tmax):
+            self.tmin = tmin
+            self.tmax = tmax
+
+        def __enter__(self):
+            self.t0 = time.time()
+
+        def __exit__(self, *args, **kwargs):
+            self.assertGreater(time.time() - self.t0, self.tmin)
+            self.assertLess(time.time() - self.t0, self.tmax)
+
+    def setUp(self):
+        sock = socket.socket()
+        sock.bind(('0.0.0.0', 0))
+        self.port = sock.getsockname()[1]
+        sock.close()
+        self.server = keepstub.Server(('0.0.0.0', self.port), keepstub.Handler)
+        self.thread = threading.Thread(target=self.server.serve_forever)
+        self.thread.daemon = True # Exit thread if main proc exits
+        self.thread.start()
+        self.api_client = self.mock_keep_services(
+            count=1,
+            service_host='localhost',
+            service_port=self.port,
+        )
+
+    def tearDown(self):
+        self.server.shutdown()
+
+    def keepClient(self, timeouts=(0.1, 1.0)):
+        return arvados.KeepClient(
+            api_client=self.api_client,
+            timeout=timeouts)
+
+    def test_timeout_slow_connect(self):
+        # Can't simulate TCP delays with our own socket. Leave our
+        # stub server running uselessly, and try to connect to an
+        # unroutable IP address instead.
+        self.api_client = self.mock_keep_services(
+            count=1,
+            service_host='240.0.0.0',
+        )
+        with self.assertTakesBetween(0.1, 0.5):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                self.keepClient((0.1, 1)).put(self.DATA, copies=1, num_retries=0)
+
+    def test_timeout_slow_request(self):
+        self.server.setdelays(request=0.2)
+        self._test_200ms()
+
+    def test_timeout_slow_response(self):
+        self.server.setdelays(response=0.2)
+        self._test_200ms()
+
+    def test_timeout_slow_response_body(self):
+        self.server.setdelays(response_body=0.2)
+        self._test_200ms()
+
+    def _test_200ms(self):
+        """Connect should be t<100ms, request should be 200ms <= t < 300ms"""
+
+        # Allow 100ms to connect, then 1s for response. Everything
+        # should work, and everything should take at least 200ms to
+        # return.
+        kc = self.keepClient((.1, 1))
+        with self.assertTakesBetween(.2, .3):
+            loc = kc.put(self.DATA, copies=1, num_retries=0)
+        with self.assertTakesBetween(.2, .3):
+            self.assertEqual(self.DATA, kc.get(loc, num_retries=0))
+
+        # Allow 1s to connect, then 100ms for response. Nothing should
+        # work, and everything should take at least 100ms to return.
+        kc = self.keepClient((1, .1))
+        with self.assertTakesBetween(.1, .2):
+            with self.assertRaises(arvados.errors.KeepReadError):
+                kc.get(loc, num_retries=0)
+        with self.assertTakesBetween(.1, .2):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                kc.put(self.DATA, copies=1, num_retries=0)
+
+
+class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock):
+    def mock_disks_and_gateways(self, disks=3, gateways=1):
+        self.gateways = [{
+                'uuid': 'zzzzz-bi6l4-gateway{:08d}'.format(i),
+                'owner_uuid': 'zzzzz-tpzed-000000000000000',
+                'service_host': 'gatewayhost{}'.format(i),
+                'service_port': 12345,
+                'service_ssl_flag': True,
+                'service_type': 'gateway:test',
+        } for i in range(gateways)]
+        self.gateway_roots = [
+            "https://{service_host}:{service_port}/".format(**gw)
+            for gw in self.gateways]
+        self.api_client = self.mock_keep_services(
+            count=disks, additional_services=self.gateways)
+        self.keepClient = arvados.KeepClient(api_client=self.api_client)
+
+    @mock.patch('pycurl.Curl')
+    def test_get_with_gateway_hint_first(self, MockCurl):
+        MockCurl.return_value = tutil.FakeCurl.make(
+            code=200, body='foo', headers={'Content-Length': 3})
+        self.mock_disks_and_gateways()
+        locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@' + self.gateways[0]['uuid']
+        self.assertEqual('foo', self.keepClient.get(locator))
+        self.assertEqual(self.gateway_roots[0]+locator,
+                         MockCurl.return_value.getopt(pycurl.URL))
+
+    @mock.patch('pycurl.Curl')
+    def test_get_with_gateway_hints_in_order(self, MockCurl):
+        gateways = 4
+        disks = 3
+        mocks = [
+            tutil.FakeCurl.make(code=404, body='')
+            for _ in range(gateways+disks)
+        ]
+        MockCurl.side_effect = tutil.queue_with(mocks)
+        self.mock_disks_and_gateways(gateways=gateways, disks=disks)
+        locator = '+'.join(['acbd18db4cc2f85cedef654fccc4a4d8+3'] +
+                           ['K@'+gw['uuid'] for gw in self.gateways])
+        with self.assertRaises(arvados.errors.NotFoundError):
+            self.keepClient.get(locator)
+        # Gateways are tried first, in the order given.
+        for i, root in enumerate(self.gateway_roots):
+            self.assertEqual(root+locator,
+                             mocks[i].getopt(pycurl.URL))
+        # Disk services are tried next.
+        for i in range(gateways, gateways+disks):
+            self.assertRegexpMatches(
+                mocks[i].getopt(pycurl.URL),
+                r'keep0x')
+
+    @mock.patch('pycurl.Curl')
+    def test_get_with_remote_proxy_hint(self, MockCurl):
+        MockCurl.return_value = tutil.FakeCurl.make(
+            code=200, body='foo', headers={'Content-Length': 3})
+        self.mock_disks_and_gateways()
+        locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
+        self.assertEqual('foo', self.keepClient.get(locator))
+        self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
+                         MockCurl.return_value.getopt(pycurl.URL))
+
+
 class KeepClientRetryTestMixin(object):
     # Testing with a local Keep store won't exercise the retry behavior.
     # Instead, our strategy is:
@@ -528,14 +682,14 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
     DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_DATA
     DEFAULT_EXCEPTION = arvados.errors.KeepReadError
     HINTED_LOCATOR = KeepClientRetryTestMixin.TEST_LOCATOR + '+K@xyzzy'
-    TEST_PATCHER = staticmethod(tutil.mock_get_responses)
+    TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
 
     def run_method(self, locator=KeepClientRetryTestMixin.TEST_LOCATOR,
                    *args, **kwargs):
         return self.new_client().get(locator, *args, **kwargs)
 
     def test_specific_exception_when_not_found(self):
-        with tutil.mock_get_responses(self.DEFAULT_EXPECT, 404, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200):
             self.check_exception(arvados.errors.NotFoundError, num_retries=3)
 
     def test_general_exception_with_mixed_errors(self):
@@ -544,7 +698,7 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
         # This test rigs up 50/50 disagreement between two servers, and
         # checks that it does not become a NotFoundError.
         client = self.new_client()
-        with tutil.mock_get_responses(self.DEFAULT_EXPECT, 404, 500):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
             with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
                 client.get(self.HINTED_LOCATOR)
             self.assertNotIsInstance(
@@ -552,17 +706,19 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
                 "mixed errors raised NotFoundError")
 
     def test_hint_server_can_succeed_without_retries(self):
-        with tutil.mock_get_responses(self.DEFAULT_EXPECT, 404, 200, 500):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200, 500):
             self.check_success(locator=self.HINTED_LOCATOR)
 
     def test_try_next_server_after_timeout(self):
-        with tutil.mock_get([
-                socket.timeout("timed out"),
-                tutil.fake_requests_response(200, self.DEFAULT_EXPECT)]):
+        with tutil.mock_keep_responses(
+                (socket.timeout("timed out"), 200),
+                (self.DEFAULT_EXPECT, 200)):
             self.check_success(locator=self.HINTED_LOCATOR)
 
     def test_retry_data_with_wrong_checksum(self):
-        with tutil.mock_get((tutil.fake_requests_response(200, s) for s in ['baddata', self.TEST_DATA])):
+        with tutil.mock_keep_responses(
+                ('baddata', 200),
+                (self.DEFAULT_EXPECT, 200)):
             self.check_success(locator=self.HINTED_LOCATOR)
 
 
@@ -570,12 +726,12 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
 class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
     DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_LOCATOR
     DEFAULT_EXCEPTION = arvados.errors.KeepWriteError
-    TEST_PATCHER = staticmethod(tutil.mock_put_responses)
+    TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
 
     def run_method(self, data=KeepClientRetryTestMixin.TEST_DATA,
                    copies=1, *args, **kwargs):
         return self.new_client().put(data, copies, *args, **kwargs)
 
     def test_do_not_send_multiple_copies_to_same_server(self):
-        with tutil.mock_put_responses(self.DEFAULT_EXPECT, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 200):
             self.check_exception(copies=2, num_retries=3)
index 0c1110c5ceb10e5164d48db9c03bd2e74f8b9639..4f147ba54c01ab3975addb8f66386dee9c61656d 100644 (file)
@@ -7,8 +7,6 @@ import arvados.errors as arv_error
 import arvados.retry as arv_retry
 import mock
 
-from arvados_testutil import fake_requests_response
-
 class RetryLoopTestMixin(object):
     @staticmethod
     def loop_success(result):
@@ -150,8 +148,7 @@ class RetryLoopBackoffTestCase(unittest.TestCase, RetryLoopTestMixin):
 class CheckHTTPResponseSuccessTestCase(unittest.TestCase):
     def results_map(self, *codes):
         for code in codes:
-            response = fake_requests_response(code, None)
-            yield code, arv_retry.check_http_response_success(response)
+            yield code, arv_retry.check_http_response_success(code)
 
     def check(assert_name):
         def check_method(self, expected, *codes):
index acb9929cae3ce90f9207d4e0754bd66cc57f5f2c..6c3bd61414173fb64fe9ef7b7b1b44dcc4af6d9d 100644 (file)
@@ -21,7 +21,7 @@ class StreamFileReaderTestCase(unittest.TestCase):
         return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
                                 'count.txt')
 
-    def test_read_returns_first_block(self):
+    def test_read_block_crossing_behavior(self):
         # read() calls will be aligned on block boundaries - see #3663.
         sfile = self.make_count_reader()
         self.assertEqual('123', sfile.read(10))
@@ -199,47 +199,47 @@ class StreamRetryTestMixin(object):
 
     @tutil.skip_sleep
     def test_success_without_retries(self):
-        with tutil.mock_get_responses('bar', 200):
+        with tutil.mock_keep_responses('bar', 200):
             reader = self.reader_for('bar_file')
             self.assertEqual('bar', self.read_for_test(reader, 3))
 
     @tutil.skip_sleep
     def test_read_no_default_retry(self):
-        with tutil.mock_get_responses('', 500):
+        with tutil.mock_keep_responses('', 500):
             reader = self.reader_for('user_agreement')
             with self.assertRaises(arvados.errors.KeepReadError):
                 self.read_for_test(reader, 10)
 
     @tutil.skip_sleep
     def test_read_with_instance_retries(self):
-        with tutil.mock_get_responses('foo', 500, 200):
+        with tutil.mock_keep_responses('foo', 500, 200):
             reader = self.reader_for('foo_file', num_retries=3)
             self.assertEqual('foo', self.read_for_test(reader, 3))
 
     @tutil.skip_sleep
     def test_read_with_method_retries(self):
-        with tutil.mock_get_responses('foo', 500, 200):
+        with tutil.mock_keep_responses('foo', 500, 200):
             reader = self.reader_for('foo_file')
             self.assertEqual('foo',
                              self.read_for_test(reader, 3, num_retries=3))
 
     @tutil.skip_sleep
     def test_read_instance_retries_exhausted(self):
-        with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
+        with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
             reader = self.reader_for('bar_file', num_retries=3)
             with self.assertRaises(arvados.errors.KeepReadError):
                 self.read_for_test(reader, 3)
 
     @tutil.skip_sleep
     def test_read_method_retries_exhausted(self):
-        with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
+        with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
             reader = self.reader_for('bar_file')
             with self.assertRaises(arvados.errors.KeepReadError):
                 self.read_for_test(reader, 3, num_retries=3)
 
     @tutil.skip_sleep
     def test_method_retries_take_precedence(self):
-        with tutil.mock_get_responses('', 500, 500, 500, 200):
+        with tutil.mock_keep_responses('', 500, 500, 500, 200):
             reader = self.reader_for('user_agreement', num_retries=10)
             with self.assertRaises(arvados.errors.KeepReadError):
                 self.read_for_test(reader, 10, num_retries=1)
index ec0f443daabaea6ce1062bb106b44844851a1387..07b751908f7da26b93fd5321fe8a5c192872a8d6 100644 (file)
@@ -14,8 +14,13 @@ module Arv
         loc_list = LocatorList.new(locators)
         file_specs.map { |s| manifest.split_file_token(s) }.
             each do |file_start, file_len, file_path|
-          @root.file_at(normalize_path(stream_root, file_path)).
-            add_segment(loc_list.segment(file_start, file_len))
+          begin
+            @root.file_at(normalize_path(stream_root, file_path)).
+              add_segment(loc_list.segment(file_start, file_len))
+          rescue Errno::ENOTDIR, Errno::EISDIR => error
+            raise ArgumentError.new("%p is both a stream and file" %
+                                    error.to_s.partition(" - ").last)
+          end
         end
       end
     end
@@ -43,6 +48,19 @@ module Arv
       copy(:merge, source.chomp("/"), target, source_collection, opts)
     end
 
+    def each_file_path(&block)
+      @root.each_file_path(&block)
+    end
+
+    def exist?(path)
+      begin
+        substream, item = find(path)
+        not (substream.leaf? or substream[item].nil?)
+      rescue Errno::ENOENT, Errno::ENOTDIR
+        false
+      end
+    end
+
     def rename(source, target)
       copy(:add_copy, source, target) { rm_r(source) }
     end
@@ -88,13 +106,19 @@ module Arv
       # is found and can be copied.
       source_collection = self if source_collection.nil?
       src_stream, src_tail = source_collection.find(source)
-      dst_stream, dst_tail = find(target)
+      dst_stream_path, _, dst_tail = normalize_path(target).rpartition("/")
+      if dst_stream_path.empty?
+        dst_stream, dst_tail = @root.find(dst_tail)
+        dst_tail ||= src_tail
+      else
+        dst_stream = @root.stream_at(dst_stream_path)
+        dst_tail = src_tail if dst_tail.empty?
+      end
       if (source_collection.equal?(self) and
           (src_stream.path == dst_stream.path) and (src_tail == dst_tail))
         return self
       end
       src_item = src_stream[src_tail]
-      dst_tail ||= src_tail
       check_method = "check_can_#{copy_method}".to_sym
       target_name = nil
       if opts.fetch(:descend_target, true)
@@ -272,6 +296,17 @@ module Arv
         end
       end
 
+      def each_file_path
+        return to_enum(__method__) unless block_given?
+        items.each_value do |item|
+          if item.file?
+            yield item.path
+          else
+            item.each_file_path { |path| yield path }
+          end
+        end
+      end
+
       def find(find_path)
         # Given a POSIX-style path, return the CollectionStream that
         # contains the object at that path, and the name of the object
@@ -283,7 +318,7 @@ module Arv
 
       def stream_at(find_path)
         key, rest = find_path.split("/", 2)
-        next_stream = get_or_new(key, CollectionStream)
+        next_stream = get_or_new(key, CollectionStream, Errno::ENOTDIR)
         if rest.nil?
           next_stream
         else
@@ -294,7 +329,7 @@ module Arv
       def file_at(find_path)
         stream_path, _, file_name = find_path.rpartition("/")
         if stream_path.empty?
-          get_or_new(file_name, CollectionFile)
+          get_or_new(file_name, CollectionFile, Errno::EISDIR)
         else
           stream_at(stream_path).file_at(file_name)
         end
@@ -377,17 +412,15 @@ module Arv
         items[key] = item
       end
 
-      def get_or_new(key, klass)
+      def get_or_new(key, klass, err_class)
         # Return the collection item at `key` and ensure that it's a `klass`.
         # If `key` does not exist, create a new `klass` there.
-        # If the value for `key` is not a `klass`, raise an ArgumentError.
+        # If the value for `key` is not a `klass`, raise an `err_class`.
         item = items[key]
         if item.nil?
           self[key] = klass.new("#{path}/#{key}")
         elsif not item.is_a?(klass)
-          raise ArgumentError.
-            new("in stream %p, %p is a %s, not a %s" %
-                [path, key, items[key].class.human_name, klass.human_name])
+          raise err_class.new(item.path)
         else
           item
         end
index e4f62083b0d5568d757d8ae05dc043170e10f6ca..422dab5f5372c05d177d69a381a82b7e7f8ab1e1 100644 (file)
@@ -154,7 +154,16 @@ module Keep
             stream_name = unescape token
           elsif in_file_tokens or not Locator.valid? token
             in_file_tokens = true
-            yield [stream_name] + split_file_token(token)
+
+            file_tokens = split_file_token(token)
+            stream_name_adjuster = ''
+            if file_tokens[2].include?('/')                # '/' in filename
+              parts = file_tokens[2].rpartition('/')
+              stream_name_adjuster = parts[1] + parts[0]   # /dir_parts
+              file_tokens[2] = parts[2]
+            end
+
+            yield [stream_name + stream_name_adjuster] + file_tokens
           end
         end
       end
index 52d7377f80c7ae41889353fe0a7507e950ff43c5..6bf6a9e613c14a52a3ab3649279a8a3cfd878f9c 100644 (file)
@@ -54,6 +54,11 @@ module SDKFixtures
   NONNORMALIZED_MANIFEST =
     ["./dir2 #{random_block} 0:0:z 0:0:y 0:0:x",
      "./dir1 #{random_block} 0:0:p 0:0:o 0:0:n\n"].join("\n")
+  MANIFEST_WITH_DIRS_IN_FILENAMES =
+    [". #{random_block(10)} 0:3:file1 3:3:dir1/file1 6:3:dir1/dir2/file1\n"].join("")
+  MULTILEVEL_MANIFEST_WITH_DIRS_IN_FILENAMES =
+    [". #{random_block(10)} 0:3:file1 3:3:dir1/file1 6:4:dir1/dir2/file1\n",
+     "./dir1 #{random_block(10)} 0:3:file1 3:7:dir2/file1\n"].join("")
 
   ### Non-tree manifests
   # These manifests follow the spec, but they express a structure that can't
index 3dd1ab39712857b54552a5d382e1ed8f6d4939ee..e2a39bc5edbda8705e0434fa9738c5192b325938 100644 (file)
@@ -223,13 +223,17 @@ class CollectionTest < Minitest::Test
     assert_equal(expected.join(""), coll.manifest_text)
   end
 
-  def test_copy_stream_over_file_raises_ENOTDIR
+  def test_copy_stream_over_file_raises_ENOTDIR(source="./s1", target="./f2")
     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
     assert_raises(Errno::ENOTDIR) do
-      coll.cp_r("./s1", "./f2")
+      coll.cp_r(source, target)
     end
   end
 
+  def test_copy_file_under_file_raises_ENOTDIR
+    test_copy_stream_over_file_raises_ENOTDIR("./f1", "./f2/newfile")
+  end
+
   def test_copy_stream_over_nonempty_stream_merges_and_overwrites
     blocks = random_blocks(3, 9)
     manifest_a =
@@ -323,6 +327,20 @@ class CollectionTest < Minitest::Test
     assert_equal(expect_lines.join(""), coll.manifest_text)
   end
 
+  def test_copy_file_into_new_stream_with_implicit_filename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new/")
+    assert_equal(SIMPLEST_MANIFEST + SIMPLEST_MANIFEST.sub(". ", "./new "),
+                 coll.manifest_text)
+  end
+
+  def test_copy_file_into_new_stream_with_explicit_filename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new/newfile.txt")
+    new_line = SIMPLEST_MANIFEST.sub(". ", "./new ").sub(":simple", ":newfile")
+    assert_equal(SIMPLEST_MANIFEST + new_line, coll.manifest_text)
+  end
+
   def test_copy_stream_contents_into_root
     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
     coll.cp_r("./s1/", ".")
@@ -374,6 +392,71 @@ class CollectionTest < Minitest::Test
     test_copy_empty_source_path_raises_ArgumentError(".", "")
   end
 
+  ### .each_file_path
+
+  def test_each_file_path
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    if block_given?
+      result = yield(coll)
+    else
+      result = []
+      coll.each_file_path { |path| result << path }
+    end
+    assert_equal(["./f1", "./f2", "./s1/f1", "./s1/f3"], result.sort)
+  end
+
+  def test_each_file_path_without_block
+    test_each_file_path { |coll| coll.each_file_path.to_a }
+  end
+
+  def test_each_file_path_empty_collection
+    assert_empty(Arv::Collection.new.each_file_path.to_a)
+  end
+
+  def test_each_file_path_after_collection_emptied
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rm("simple.txt")
+    assert_empty(coll.each_file_path.to_a)
+  end
+
+  def test_each_file_path_deduplicates_manifest_listings
+    coll = Arv::Collection.new(MULTIBLOCK_FILE_MANIFEST)
+    assert_equal(["./repfile", "./s1/repfile", "./s1/uniqfile",
+                  "./uniqfile", "./uniqfile2"],
+                 coll.each_file_path.to_a.sort)
+  end
+
+  ### .exist?
+
+  def test_exist(test_method=:assert, path="f2")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    send(test_method, coll.exist?(path))
+  end
+
+  def test_file_not_exist
+    test_exist(:refute, "f3")
+  end
+
+  def test_stream_exist
+    test_exist(:assert, "s1")
+  end
+
+  def test_file_inside_stream_exist
+    test_exist(:assert, "s1/f1")
+  end
+
+  def test_path_inside_stream_not_exist
+    test_exist(:refute, "s1/f2")
+  end
+
+  def test_path_under_file_not_exist
+    test_exist(:refute, "f2/nonexistent")
+  end
+
+  def test_deep_substreams_not_exist
+    test_exist(:refute, "a/b/c/d/e/f/g")
+  end
+
   ### .rename
 
   def test_simple_file_rename
index f1f1a530ceddbf35a9d5cd3066acc180eb4f254e..70bae3eed19584be8eb097a2123ca99e383cfc42 100644 (file)
@@ -208,4 +208,41 @@ class ManifestTest < Minitest::Test
       assert !file_name.empty?, "empty file_name in #{name} fixture"
     end
   end
+
+  def test_collection_with_dirs_in_filenames
+    manifest = Keep::Manifest.new(MANIFEST_WITH_DIRS_IN_FILENAMES)
+
+    seen = Hash.new { |this, key| this[key] = [] }
+
+    manifest.files.each do |stream, basename, size|
+      refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}")
+      assert_equal(3, size, "wrong size for #{stream}/#{basename}")
+      seen[stream] << basename
+    end
+
+    assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys)
+
+    seen.each_pair do |stream, basenames|
+      assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}")
+    end
+  end
+
+  def test_multilevel_collection_with_dirs_in_filenames
+    manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST_WITH_DIRS_IN_FILENAMES)
+
+    seen = Hash.new { |this, key| this[key] = [] }
+    expected_sizes = {'.' => 3, './dir1' => 6, './dir1/dir2' => 11}
+
+    manifest.files.each do |stream, basename, size|
+      refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}")
+      assert_equal(expected_sizes[stream], size, "wrong size for #{stream}/#{basename}")
+      seen[stream] << basename
+    end
+
+    assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys)
+
+    seen.each_pair do |stream, basenames|
+      assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}")
+    end
+  end
 end
index 70f67d5d76b10ecd5798c021045398372c4497a9..1ce85d87401022e3faacf75f5dcd60c75678385d 100644 (file)
@@ -14,6 +14,7 @@ group :test, :development do
   # still mandatory.
   gem 'simplecov', '~> 0.7.1', require: false
   gem 'simplecov-rcov', require: false
+  gem 'mocha', require: false
 end
 
 # This might not be needed in :test and :development, but we load it
index a6a8326eeb7be74ea0f54cfcf296b52c69d3e51d..49775d9652501a74ece96cb9ab54682acbd1f7ab 100644 (file)
@@ -114,7 +114,10 @@ GEM
     mail (2.5.4)
       mime-types (~> 1.16)
       treetop (~> 1.4.8)
+    metaclass (0.0.4)
     mime-types (1.25.1)
+    mocha (1.1.0)
+      metaclass (~> 0.0.1)
     multi_json (1.10.1)
     multipart-post (1.2.0)
     net-scp (1.2.0)
@@ -232,6 +235,7 @@ DEPENDENCIES
   faye-websocket
   google-api-client (~> 0.6.3)
   jquery-rails
+  mocha
   multi_json
   oj
   omniauth (= 1.1.1)
index 223f5ca2168c5ab25d316ef97dd3eb6081fb1463..22b25313a5f9536764a7838ada2cb4222ebe0225 100644 (file)
@@ -11,3 +11,24 @@ rescue
 end
 
 Server::Application.load_tasks
+
+namespace :test do
+  task(:run).clear
+  # Copied from the definition in Rails 3.2.
+  # This may need to be updated if we upgrade Rails.
+  task :run do
+    errors = %w(test:units test:functionals test:integration test:tasks).collect do |task|
+      begin
+        Rake::Task[task].invoke
+        nil
+      rescue => e
+        { :task => task, :exception => e }
+      end
+    end.compact
+
+    if errors.any?
+      puts errors.map { |e| "Errors running #{e[:task]}! #{e[:exception].inspect}" }.join("\n")
+      abort
+    end
+  end
+end
index 69c03bde9fc0a1b22ca7a39a3bb7a78d94dd471c..6810d91d4cd6c235751365b1e6d1856a5f6a19b8 100644 (file)
@@ -191,9 +191,18 @@ class ApplicationController < ActionController::Base
            }.merge opts)
   end
 
+  def self.limit_index_columns_read
+    # This method returns a list of column names.
+    # If an index request reads that column from the database,
+    # find_objects_for_index will only fetch objects until it reads
+    # max_index_database_read bytes of data from those columns.
+    []
+  end
+
   def find_objects_for_index
     @objects ||= model_class.readable_by(*@read_users)
     apply_where_limit_order_params
+    limit_database_read if (action_name == "index")
   end
 
   def apply_filters model_class=nil
@@ -268,10 +277,7 @@ class ApplicationController < ActionController::Base
         # Map attribute names in @select to real column names, resolve
         # those to fully-qualified SQL column names, and pass the
         # resulting string to the select method.
-        api_column_map = model_class.attributes_required_columns
-        columns_list = @select.
-          flat_map { |attr| api_column_map[attr] }.
-          uniq.
+        columns_list = model_class.columns_for_attributes(@select).
           map { |s| "#{ar_table_name}.#{ActiveRecord::Base.connection.quote_column_name s}" }
         @objects = @objects.select(columns_list.join(", "))
       end
@@ -289,6 +295,29 @@ class ApplicationController < ActionController::Base
     @objects = @objects.uniq(@distinct) if not @distinct.nil?
   end
 
+  def limit_database_read
+    limit_columns = self.class.limit_index_columns_read
+    limit_columns &= model_class.columns_for_attributes(@select) if @select
+    return if limit_columns.empty?
+    model_class.transaction do
+      limit_query = @objects.
+        select("(%s) as read_length" %
+               limit_columns.map { |s| "length(#{s})" }.join(" + "))
+      new_limit = 0
+      read_total = 0
+      limit_query.find_each do |record|
+        new_limit += 1
+        read_total += record.read_length.to_i
+        break if ((read_total >= Rails.configuration.max_index_database_read) or
+                  (new_limit >= @limit))
+      end
+      @limit = new_limit
+      @objects = @objects.limit(@limit)
+      # Force @objects to run its query inside this transaction.
+      @objects.each { |_| break }
+    end
+  end
+
   def resource_attrs
     return @attrs if @attrs
     @attrs = params[resource_name]
index 956de8e8942826bdb1ad1473c9d5e1f59631a8e9..44733cdfb82ff1c21c4ca379a723110ebcaf5721 100644 (file)
@@ -1,6 +1,10 @@
 require "arvados/keep"
 
 class Arvados::V1::CollectionsController < ApplicationController
+  def self.limit_index_columns_read
+    ["manifest_text"]
+  end
+
   def create
     if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
       resource_attrs[:portable_data_hash] = loc.to_s
index ce8a05cba26888d649f6fc28cf7de8de55ee6407..f1ef2d824054f3a0dbe3bb338a966d3a00341b10 100644 (file)
@@ -33,20 +33,27 @@ class Arvados::V1::JobsController < ApplicationController
         @filters =
           [["repository", "=", resource_attrs[:repository]],
            ["script", "=", resource_attrs[:script]],
-           ["script_version", "in git",
-            params[:minimum_script_version] || resource_attrs[:script_version]],
            ["script_version", "not in git", params[:exclude_script_versions]],
           ].reject { |filter| filter.last.nil? or filter.last.empty? }
+        if !params[:minimum_script_version].blank?
+          @filters << ["script_version", "in git",
+                       params[:minimum_script_version]]
+        else
+          add_default_git_filter("script_version", resource_attrs[:repository],
+                                 resource_attrs[:script_version])
+        end
         if image_search = resource_attrs[:runtime_constraints].andand["docker_image"]
           if image_tag = resource_attrs[:runtime_constraints]["docker_image_tag"]
             image_search += ":#{image_tag}"
           end
-          @filters.append(["docker_image_locator", "in docker", image_search])
+          image_locator = Collection.
+            for_latest_docker_image(image_search).andand.portable_data_hash
         else
-          @filters.append(["docker_image_locator", "=", nil])
+          image_locator = nil
         end
+        @filters << ["docker_image_locator", "=", image_locator]
         if sdk_version = resource_attrs[:runtime_constraints].andand["arvados_sdk_version"]
-          @filters.append(["arvados_sdk_version", "in git", sdk_version])
+          add_default_git_filter("arvados_sdk_version", "arvados", sdk_version)
         end
         begin
           load_job_specific_filters
@@ -199,6 +206,16 @@ class Arvados::V1::JobsController < ApplicationController
 
   protected
 
+  def add_default_git_filter(attr_name, repo_name, refspec)
+    # Add a filter to @filters for `attr_name` = the latest commit available
+    # in `repo_name` at `refspec`.  No filter is added if refspec can't be
+    # resolved.
+    commits = Commit.find_commit_range(repo_name, nil, refspec, nil)
+    if commit_hash = commits.first
+      @filters << [attr_name, "=", commit_hash]
+    end
+  end
+
   def load_job_specific_filters
     # Convert Job-specific @filters entries into general SQL filters.
     script_info = {"repository" => nil, "script" => nil}
@@ -254,18 +271,17 @@ class Arvados::V1::JobsController < ApplicationController
       else
         raise ArgumentError.new("unknown attribute for git filter: #{attr}")
       end
-      version_range = Commit.find_commit_range(current_user,
-                                               filter["repository"],
-                                               filter["min_version"],
-                                               filter["max_version"],
-                                               filter["exclude_versions"])
-      if version_range.nil?
+      revisions = Commit.find_commit_range(filter["repository"],
+                                           filter["min_version"],
+                                           filter["max_version"],
+                                           filter["exclude_versions"])
+      if revisions.empty?
         raise ArgumentError.
           new("error searching #{filter['repository']} from " +
               "'#{filter['min_version']}' to '#{filter['max_version']}', " +
               "excluding #{filter['exclude_versions']}")
       end
-      @filters.append([attr, "in", version_range])
+      @filters.append([attr, "in", revisions])
     end
   end
 
index 20e9690bb10608443bc4827251c00693c598b21d..62d5e59c8d142ce5116da263c9314def02b670d1 100644 (file)
@@ -28,7 +28,6 @@ class Arvados::V1::SchemaController < ApplicationController
         description: "The API to interact with Arvados.",
         documentationLink: "http://doc.arvados.org/api/index.html",
         defaultCollectionReplication: Rails.configuration.default_collection_replication,
-        gitHttpBase: Rails.configuration.git_http_base,
         protocol: "rest",
         baseUrl: root_url + "arvados/v1/",
         basePath: "/arvados/v1/",
@@ -36,6 +35,7 @@ class Arvados::V1::SchemaController < ApplicationController
         servicePath: "arvados/v1/",
         batchPath: "batch",
         defaultTrashLifetime: Rails.application.config.default_trash_lifetime,
+        blobSignatureTtl: Rails.application.config.blob_signature_ttl,
         maxRequestSize: Rails.application.config.max_request_size,
         parameters: {
           alt: {
index 02e9386bfef8a8c08046e137a40e627b189e6c25..936f823a5e0ab0cd314359d800491551876ef251 100644 (file)
@@ -103,6 +103,13 @@ class ArvadosModel < ActiveRecord::Base
     api_column_map
   end
 
+  def self.columns_for_attributes(select_attributes)
+    # Given an array of attribute names to select, return an array of column
+    # names that must be fetched from the database to satisfy the request.
+    api_column_map = attributes_required_columns
+    select_attributes.flat_map { |attr| api_column_map[attr] }.uniq
+  end
+
   def self.default_orders
     ["#{table_name}.modified_at desc", "#{table_name}.uuid"]
   end
index 89ad874cd7d211aae90f841927a620150af1ab9b..ccfb35e49685e6746312291c852bfb2e5ed1867f 100644 (file)
@@ -60,7 +60,7 @@ class Collection < ArvadosModel
       signing_opts = {
         key: Rails.configuration.blob_signing_key,
         api_token: api_token,
-        ttl: Rails.configuration.blob_signing_ttl,
+        ttl: Rails.configuration.blob_signature_ttl,
       }
       self.manifest_text.lines.each do |entry|
         entry.split[1..-1].each do |tok|
@@ -195,7 +195,7 @@ class Collection < ArvadosModel
     signing_opts = {
       key: Rails.configuration.blob_signing_key,
       api_token: token,
-      ttl: Rails.configuration.blob_signing_ttl,
+      ttl: Rails.configuration.blob_signature_ttl,
     }
     m = manifest.dup
     munge_manifest_locators!(m) do |loc|
index 0d47b63c61ea000fd04b50d7bf61b442aa25cb24..a6b085722e90fb043a2277f7781727218e8e2559 100644 (file)
@@ -1,5 +1,11 @@
 class Commit < ActiveRecord::Base
-  require 'shellwords'
+  extend CurrentApiClient
+
+  class GitError < StandardError
+    def http_status
+      422
+    end
+  end
 
   def self.git_check_ref_format(e)
     if !e or e.empty? or e[0] == '-' or e[0] == '$'
@@ -11,142 +17,193 @@ class Commit < ActiveRecord::Base
     end
   end
 
-  def self.find_commit_range(current_user, repository, minimum, maximum, exclude)
+  # Return an array of commits (each a 40-char sha1) satisfying the
+  # given criteria.
+  #
+  # Return [] if the revisions given in minimum/maximum are invalid or
+  # don't exist in the given repository.
+  #
+  # Raise ArgumentError if the given repository is invalid, does not
+  # exist, or cannot be read for any reason. (Any transient error that
+  # prevents commit ranges from resolving must raise rather than
+  # returning an empty array.)
+  #
+  # repository can be the name of a locally hosted repository or a git
+  # URL (see git-fetch(1)). Currently http, https, and git schemes are
+  # supported.
+  def self.find_commit_range repository, minimum, maximum, exclude
     if minimum and minimum.empty?
       minimum = nil
     end
 
     if minimum and !git_check_ref_format(minimum)
       logger.warn "find_commit_range called with invalid minimum revision: '#{minimum}'"
-      return nil
+      return []
     end
 
     if maximum and !git_check_ref_format(maximum)
       logger.warn "find_commit_range called with invalid maximum revision: '#{maximum}'"
-      return nil
+      return []
     end
 
     if !maximum
       maximum = "HEAD"
     end
 
-    # Get list of actual repository directories under management
-    on_disk_repos = repositories
-
-    # Get list of repository objects readable by user
-    readable = Repository.readable_by(current_user)
-
-    # filter repository objects on requested repository name
-    if repository
-      readable = readable.where(name: repository)
-    end
+    gitdir, is_remote = git_dir_for repository
+    fetch_remote_repository gitdir, repository if is_remote
+    ENV['GIT_DIR'] = gitdir
 
     commits = []
-    readable.each do |r|
-      if on_disk_repos[r.name]
-        ENV['GIT_DIR'] = on_disk_repos[r.name][:git_dir]
 
-        # We've filtered for invalid characters, so we can pass the contents of
-        # minimum and maximum safely on the command line
+    # Get the commit hash for the upper bound
+    max_hash = nil
+    IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
+      max_hash = line.strip
+    end
 
-        # Get the commit hash for the upper bound
-        max_hash = nil
-        IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
-          max_hash = line.strip
-        end
+    # If not found or string is invalid, nothing else to do
+    return [] if !max_hash or !git_check_ref_format(max_hash)
 
-        # If not found or string is invalid, nothing else to do
-        next if !max_hash or !git_check_ref_format(max_hash)
-
-        resolved_exclude = nil
-        if exclude
-          resolved_exclude = []
-          exclude.each do |e|
-            if git_check_ref_format(e)
-              IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
-                resolved_exclude.push(line.strip)
-              end
-            else
-              logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
-              return nil
-            end
+    resolved_exclude = nil
+    if exclude
+      resolved_exclude = []
+      exclude.each do |e|
+        if git_check_ref_format(e)
+          IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
+            resolved_exclude.push(line.strip)
           end
+        else
+          logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
+          return []
         end
+      end
+    end
 
-        if minimum
-          # Get the commit hash for the lower bound
-          min_hash = nil
-          IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
-            min_hash = line.strip
-          end
-
-          # If not found or string is invalid, nothing else to do
-          next if !min_hash or !git_check_ref_format(min_hash)
+    if minimum
+      # Get the commit hash for the lower bound
+      min_hash = nil
+      IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
+        min_hash = line.strip
+      end
 
-          # Now find all commits between them
-          IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
-            hash = line.strip
-            commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
-          end
+      # If not found or string is invalid, nothing else to do
+      return [] if !min_hash or !git_check_ref_format(min_hash)
 
-          commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
-        else
-          commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
-        end
-      else
-        logger.warn "Repository #{r.name} exists in table but not found on disk"
+      # Now find all commits between them
+      IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
+        hash = line.strip
+        commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
       end
-    end
 
-    if !commits or commits.empty?
-      nil
+      commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
     else
-      commits
+      commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
     end
+
+    commits
   end
 
-  # Import all commits from configured git directory into the commits
-  # database.
-
-  def self.import_all
-    repositories.each do |repo_name, repo|
-      stat = { true => 0, false => 0 }
-      ENV['GIT_DIR'] = repo[:git_dir]
-      IO.foreach("|git rev-list --format=oneline --all") do |line|
-        sha1, message = line.strip.split " ", 2
-        imported = false
-        Commit.find_or_create_by_repository_name_and_sha1_and_message(repo_name, sha1, message[0..254]) do
-          imported = true
-        end
-        stat[!!imported] += 1
-        if (stat[true] + stat[false]) % 100 == 0
-          if $stdout.tty? or ARGV[0] == '-v'
-            puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
-          end
-        end
-      end
-      if $stdout.tty? or ARGV[0] == '-v'
-        puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
-      end
+  # Given a repository (url, or name of hosted repo) and commit sha1,
+  # copy the commit into the internal git repo and tag it with the
+  # given tag (typically a job UUID).
+  #
+  # The repo can be a remote url, but in this case sha1 must already
+  # be present in our local cache for that repo: e.g., sha1 was just
+  # returned by find_commit_range.
+  def self.tag_in_internal_repository repo_name, sha1, tag
+    unless git_check_ref_format tag
+      raise ArgumentError.new "invalid tag #{tag}"
+    end
+    unless /^[0-9a-f]{40}$/ =~ sha1
+      raise ArgumentError.new "invalid sha1 #{sha1}"
+    end
+    src_gitdir, _ = git_dir_for repo_name
+    unless src_gitdir
+      raise ArgumentError.new "no local repository for #{repo_name}"
     end
+    dst_gitdir = Rails.configuration.git_internal_dir
+    must_pipe("echo #{sha1.shellescape}",
+              "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
+              "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
+    must_git(dst_gitdir,
+             "tag --force #{tag.shellescape} #{sha1.shellescape}")
   end
 
-  def self.refresh_repositories
-    @repositories = nil
+  protected
+
+  def self.remote_url? repo_name
+    /^(https?|git):\/\// =~ repo_name
   end
 
-  protected
+  # Return [local_git_dir, is_remote]. If is_remote, caller must use
+  # fetch_remote_repository to ensure content is up-to-date.
+  #
+  # Raises an exception if the latest content could not be fetched for
+  # any reason.
+  def self.git_dir_for repo_name
+    if remote_url? repo_name
+      return [cache_dir_for(repo_name), true]
+    end
+    repos = Repository.readable_by(current_user).where(name: repo_name)
+    if repos.count == 0
+      raise ArgumentError.new "Repository not found: '#{repo_name}'"
+    elsif repos.count > 1
+      logger.error "Multiple repositories with name=='#{repo_name}'!"
+      raise ArgumentError.new "Name conflict"
+    else
+      return [repos.first.server_path, false]
+    end
+  end
+
+  def self.cache_dir_for git_url
+    File.join(cache_dir_base, Digest::SHA1.hexdigest(git_url) + ".git").to_s
+  end
 
- def self.repositories
-   return @repositories if @repositories
+  def self.cache_dir_base
+    Rails.root.join 'tmp', 'git'
+  end
 
-   @repositories = {}
-   Repository.find_each do |repo|
-     if git_dir = repo.server_path
-       @repositories[repo.name] = {git_dir: git_dir}
-     end
-   end
+  def self.fetch_remote_repository gitdir, git_url
+    # Caller decides which protocols are worth using. This is just a
+    # safety check to ensure we never use urls like "--flag" or wander
+    # into git's hardlink features by using bare "/path/foo" instead
+    # of "file:///path/foo".
+    unless /^[a-z]+:\/\// =~ git_url
+      raise ArgumentError.new "invalid git url #{git_url}"
+    end
+    begin
+      must_git gitdir, "branch"
+    rescue GitError => e
+      raise unless /Not a git repository/ =~ e.to_s
+      # OK, this just means we need to create a blank cache repository
+      # before fetching.
+      FileUtils.mkdir_p gitdir
+      must_git gitdir, "init"
+    end
+    must_git(gitdir,
+             "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'")
+  end
 
-   @repositories
- end
+  def self.must_git gitdir, *cmds
+    # Clear token in case a git helper tries to use it as a password.
+    orig_token = ENV['ARVADOS_API_TOKEN']
+    ENV['ARVADOS_API_TOKEN'] = ''
+    begin
+      git = "git --git-dir #{gitdir.shellescape}"
+      cmds.each do |cmd|
+        must_pipe git+" "+cmd
+      end
+    ensure
+      ENV['ARVADOS_API_TOKEN'] = orig_token
+    end
+  end
+
+  def self.must_pipe *cmds
+    cmd = cmds.join(" 2>&1 |") + " 2>&1"
+    out = IO.read("| </dev/null #{cmd}")
+    if not $?.success?
+      raise GitError.new "#{cmd}: #{$?}: #{out}"
+    end
+  end
 end
index 01df069f32f90ae2cc4dd7955c76b6f7b9c572c0..0923a6f4f304863f307e49c69f5648bff3144d89 100644 (file)
@@ -16,6 +16,7 @@ class Job < ArvadosModel
   validate :validate_status
   validate :validate_state_change
   validate :ensure_no_collection_uuids_in_script_params
+  before_save :tag_version_in_internal_repository
   before_save :update_timestamps_when_state_changes
 
   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
@@ -124,21 +125,43 @@ class Job < ArvadosModel
   end
 
   def ensure_script_version_is_commit
-    if self.state == Running
+    if state == Running
       # Apparently client has already decided to go for it. This is
       # needed to run a local job using a local working directory
       # instead of a commit-ish.
       return true
     end
-    if new_record? or script_version_changed?
-      sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil
-      if sha1
-        self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty?
-        self.script_version = sha1
-      else
-        self.errors.add :script_version, "#{self.script_version} does not resolve to a commit"
+    if new_record? or repository_changed? or script_version_changed?
+      sha1 = Commit.find_commit_range(repository,
+                                      nil, script_version, nil).first
+      if not sha1
+        errors.add :script_version, "#{script_version} does not resolve to a commit"
         return false
       end
+      if supplied_script_version.nil? or supplied_script_version.empty?
+        self.supplied_script_version = script_version
+      end
+      self.script_version = sha1
+    end
+    true
+  end
+
+  def tag_version_in_internal_repository
+    if state == Running
+      # No point now. See ensure_script_version_is_commit.
+      true
+    elsif errors.any?
+      # Won't be saved, and script_version might not even be valid.
+      true
+    elsif new_record? or repository_changed? or script_version_changed?
+      uuid_was = uuid
+      begin
+        assign_uuid
+        Commit.tag_in_internal_repository repository, script_version, uuid
+      rescue
+        uuid = uuid_was
+        raise
+      end
     end
   end
 
@@ -169,9 +192,9 @@ class Job < ArvadosModel
   def find_arvados_sdk_version
     resolve_runtime_constraint("arvados_sdk_version",
                                :arvados_sdk_version) do |git_search|
-      commits = Commit.find_commit_range(current_user, "arvados",
+      commits = Commit.find_commit_range("arvados",
                                          nil, git_search, nil)
-      if commits.nil? or commits.empty?
+      if commits.empty?
         [false, "#{git_search} does not resolve to a commit"]
       elsif not runtime_constraints["docker_image"]
         [false, "cannot be specified without a Docker image constraint"]
index bf27f6ff99104879da00019bd09bd7f975fbfd74..6c056502d44170efd09bcec2db0696341f1a3166 100644 (file)
@@ -13,8 +13,7 @@ class Node < ArvadosModel
   belongs_to(:job, foreign_key: :job_uuid, primary_key: :uuid)
   attr_accessor :job_readable
 
-  MAX_SLOTS = 64
-
+  @@max_compute_nodes = Rails.configuration.max_compute_nodes
   @@dns_server_conf_dir = Rails.configuration.dns_server_conf_dir
   @@dns_server_conf_template = Rails.configuration.dns_server_conf_template
   @@dns_server_reload_command = Rails.configuration.dns_server_reload_command
@@ -114,7 +113,7 @@ class Node < ArvadosModel
         rescue ActiveRecord::RecordNotUnique
           try_slot += 1
         end
-        raise "No available node slots" if try_slot == MAX_SLOTS
+        raise "No available node slots" if try_slot == @@max_compute_nodes
       end while true
       self.hostname = self.class.hostname_for_slot(self.slot_number)
     end
@@ -192,7 +191,7 @@ class Node < ArvadosModel
   # At startup, make sure all DNS entries exist.  Otherwise, slurmctld
   # will refuse to start.
   if @@dns_server_conf_dir and @@dns_server_conf_template
-    (0..MAX_SLOTS-1).each do |slot_number|
+    (0..@@max_compute_nodes-1).each do |slot_number|
       hostname = hostname_for_slot(slot_number)
       hostfile = File.join @@dns_server_conf_dir, "#{hostname}.conf"
       if !File.exists? hostfile
index d2da6eab29201d5279b699631a5a537def66180e..f361a49db5dcd49b649d7e7f79c255e214eae97a 100644 (file)
@@ -13,22 +13,27 @@ class Repository < ArvadosModel
     t.add :name
     t.add :fetch_url
     t.add :push_url
+    t.add :clone_urls
   end
 
   def self.attributes_required_columns
-    super.merge({"push_url" => ["name"], "fetch_url" => ["name"]})
+    super.merge("clone_urls" => ["name"],
+                "fetch_url" => ["name"],
+                "push_url" => ["name"])
   end
 
+  # Deprecated. Use clone_urls instead.
   def push_url
-    if Rails.configuration.git_host
-      "git@%s:%s.git" % [Rails.configuration.git_host, name]
-    else
-      "git@git.%s.arvadosapi.com:%s.git" % [Rails.configuration.uuid_prefix, name]
-    end
+    ssh_clone_url
   end
 
+  # Deprecated. Use clone_urls instead.
   def fetch_url
-    push_url
+    ssh_clone_url
+  end
+
+  def clone_urls
+    [ssh_clone_url, https_clone_url].compact
   end
 
   def server_path
@@ -87,4 +92,24 @@ class Repository < ArvadosModel
       false
     end
   end
+
+  def ssh_clone_url
+    _clone_url :git_repo_ssh_base, 'git@git.%s.arvadosapi.com:'
+  end
+
+  def https_clone_url
+    _clone_url :git_repo_https_base, 'https://git.%s.arvadosapi.com/'
+  end
+
+  def _clone_url config_var, default_base_fmt
+    configured_base = Rails.configuration.send config_var
+    return nil if configured_base == false
+    prefix = new_record? ? Rails.configuration.uuid_prefix : uuid[0,5]
+    if prefix == Rails.configuration.uuid_prefix and configured_base != true
+      base = configured_base
+    else
+      base = default_base_fmt % prefix
+    end
+    '%s%s.git' % [base, name]
+  end
 end
index fe5e07b3b7c736d1c933005cd1dc1bcd6c66f243..2200d050990809d04f3e5fdbe088a1af7621ba01 100644 (file)
@@ -194,7 +194,7 @@ class User < ArvadosModel
   def setup_repo_vm_links(repo_name, vm_uuid, openid_prefix)
     oid_login_perm = create_oid_login_perm openid_prefix
     repo_perm = create_user_repo_link repo_name
-    vm_login_perm = create_vm_login_permission_link vm_uuid, repo_name
+    vm_login_perm = create_vm_login_permission_link vm_uuid, username
     group_perm = create_user_group_link
 
     return [oid_login_perm, repo_perm, vm_login_perm, group_perm, self].compact
index 1696e2c56ebf9226126945c86b254144c7acd7fa..e5c00c5ab76b37ff3fea654bd2235954fcc1b5ef 100644 (file)
@@ -45,6 +45,8 @@ test:
   blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
   user_profile_notification_address: arvados@example.com
   workbench_address: https://localhost:3001/
+  git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
+  git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
 
 common:
   # The prefix used for all database identifiers to identify the record as
@@ -57,10 +59,19 @@ common:
   # logic for deciding on a hostname.
   host: false
 
-  # If not false, this is the hostname that will be used to generate fetch_url
-  # and push_url for git repositories.  By default, this will be
-  # git.(uuid_prefix).arvadosapi.com
-  git_host: false
+  # Base part of SSH git clone url given with repository resources. If
+  # true, the default "git@git.(uuid_prefix).arvadosapi.com:" is
+  # used. If false, SSH clone URLs are not advertised. Include a
+  # trailing ":" or "/" if needed: it will not be added automatically.
+  git_repo_ssh_base: true
+
+  # Base part of HTTPS git clone urls given with repository
+  # resources. This is expected to be an arv-git-httpd service which
+  # accepts API tokens as HTTP-auth passwords. If true, the default
+  # "https://git.(uuid_prefix).arvadosapi.com/" is used. If false,
+  # HTTPS clone URLs are not advertised. Include a trailing ":" or "/"
+  # if needed: it will not be added automatically.
+  git_repo_https_base: true
 
   # If this is not false, HTML requests at the API server's root URL
   # are redirected to this location, and it is provided in the text of
@@ -74,11 +85,6 @@ common:
   # {git_repositories_dir}/arvados/.git
   git_repositories_dir: /var/lib/arvados/git
 
-  # If an arv-git-httpd service is running, advertise it in the
-  # discovery document by adding its public URI base here. Example:
-  # https://git.xxxxx.arvadosapi.com
-  git_http_base: false
-
   # This is a (bare) repository that stores commits used in jobs.  When a job
   # runs, the source commits are first fetched into this repository, then this
   # repository is used to deploy to compute nodes.  This should NOT be a
@@ -214,9 +220,23 @@ common:
   # a site secret. It should be at least 50 characters.
   blob_signing_key: ~
 
-  # Amount of time (in seconds) for which a blob permission signature
-  # remains valid.  Default: 2 weeks (1209600 seconds)
-  blob_signing_ttl: 1209600
+  # Lifetime (in seconds) of blob permission signatures generated by
+  # the API server. This determines how long a client can take (after
+  # retrieving a collection record) to retrieve the collection data
+  # from Keep. If the client needs more time than that (assuming the
+  # collection still has the same content and the relevant user/token
+  # still has permission) the client can retrieve the collection again
+  # to get fresh signatures.
+  #
+  # Datamanager considers an unreferenced block older than this to be
+  # eligible for garbage collection. Therefore, it should never be
+  # smaller than the corresponding value used by any local keepstore
+  # service (see keepstore -blob-signature-ttl flag). This rule
+  # prevents datamanager from trying to garbage-collect recently
+  # written blocks while clients are still holding valid signatures.
+  #
+  # The default is 2 weeks.
+  blob_signature_ttl: 1209600
 
   # Allow clients to create collections by providing a manifest with
   # unsigned data blob locators. IMPORTANT: This effectively disables
@@ -234,7 +254,8 @@ common:
   # should be at least 50 characters.
   secret_token: ~
 
-  # email address to which mail should be sent when the user creates profile for the first time
+  # Email address to notify whenever a user creates a profile for the
+  # first time
   user_profile_notification_address: false
 
   default_openid_prefix: https://www.google.com/accounts/o8/id
@@ -268,3 +289,25 @@ common:
   # Note you must separately configure the upstream web server or proxy to
   # actually enforce the desired maximum request size on the server side.
   max_request_size: 134217728
+
+  # Stop collecting records for an index request after we read this much
+  # data (in bytes) from large database columns.
+  # Currently only `GET /collections` respects this parameter, when the
+  # user requests an index that includes manifest_text.  Once the API
+  # server collects records with a total manifest_text size at or above
+  # this amount, it returns those results immediately.
+  # Note this is a threshold, not a limit.  Record collection stops
+  # *after* reading this much data.
+  max_index_database_read: 134217728
+
+  # When you run the db:delete_old_job_logs task, it will find jobs that
+  # have been finished for at least this many seconds, and delete their
+  # stderr logs from the logs table.
+  clean_job_log_rows_after: <%= 30.days %>
+
+  # The maximum number of compute nodes that can be in use simultaneously
+  # If this limit is reduced, any existing nodes with slot number >= new limit
+  # will not be counted against the new limit. In other words, the new limit
+  # won't be strictly enforced until those nodes with higher slot numbers
+  # go down.
+  max_compute_nodes: 64
similarity index 80%
rename from services/api/config/initializers/zz_load_config.rb
rename to services/api/config/initializers/load_config.rb
index 3399fd9bf59fb9751303830cbb87aee89a502e81..3b516dacadff0a4b4031a84be772c171df7f1197 100644 (file)
@@ -1,3 +1,11 @@
+begin
+  # If secret_token.rb exists here, we need to load it first.
+  require_relative 'secret_token.rb'
+rescue LoadError
+  # Normally secret_token.rb is missing and the secret token is
+  # configured by application.yml (i.e., here!) instead.
+end
+
 $application_config = {}
 
 %w(application.default application).each do |cfgfile|
@@ -5,6 +13,8 @@ $application_config = {}
   if File.exists? path
     yaml = ERB.new(IO.read path).result(binding)
     confs = YAML.load(yaml)
+    # Ignore empty YAML file:
+    next if confs == false
     $application_config.merge!(confs['common'] || {})
     $application_config.merge!(confs[::Rails.env.to_s] || {})
   end
similarity index 55%
rename from services/api/config/initializers/zz_preload_all_models.rb
rename to services/api/config/initializers/preload_all_models.rb
index 1a76b72ed320f8ee9e2f261f1b2efb9b74eb9920..7e2612377434b9e3bfc245a8b4dc6143d6ad00c6 100644 (file)
@@ -1,7 +1,12 @@
 # See http://aaronvb.com/articles/37-rails-caching-and-undefined-class-module
 
+# Config must be done before we load model class files; otherwise they
+# won't be able to use Rails.configuration.* to initialize their
+# classes.
+require_relative 'load_config.rb'
+
 if Rails.env == 'development'
   Dir.foreach("#{Rails.root}/app/models") do |model_file|
     require_dependency model_file if model_file.match /\.rb$/
-  end 
+  end
 end
diff --git a/services/api/db/migrate/20150423145759_no_filenames_in_collection_search_index.rb b/services/api/db/migrate/20150423145759_no_filenames_in_collection_search_index.rb
new file mode 100644 (file)
index 0000000..e8423c1
--- /dev/null
@@ -0,0 +1,11 @@
+class NoFilenamesInCollectionSearchIndex < ActiveRecord::Migration
+  def up
+    remove_index :collections, :name => 'collections_search_index'
+    add_index :collections, ["owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "portable_data_hash", "uuid", "name"], name: 'collections_search_index'
+  end
+
+  def down
+    remove_index :collections, :name => 'collections_search_index'
+    add_index :collections, ["owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "portable_data_hash", "uuid", "name", "file_names"], name: 'collections_search_index'
+  end
+end
index 61fc1ae214701752926923ae826d49144eb5e6f6..a86de697388401a8c0e4739ecb60b92c23546d85 100644 (file)
@@ -432,8 +432,8 @@ CREATE TABLE jobs (
     docker_image_locator character varying(255),
     priority integer DEFAULT 0 NOT NULL,
     description character varying(524288),
-    state character varying(255),
-    arvados_sdk_version character varying(255)
+    arvados_sdk_version character varying(255),
+    state character varying(255)
 );
 
 
@@ -1316,7 +1316,7 @@ CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvec
 -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -; Tablespace: 
 --
 
-CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, file_names);
+CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name);
 
 
 --
@@ -2374,4 +2374,6 @@ INSERT INTO schema_migrations (version) VALUES ('20150312151136');
 
 INSERT INTO schema_migrations (version) VALUES ('20150317132720');
 
-INSERT INTO schema_migrations (version) VALUES ('20150324152204');
\ No newline at end of file
+INSERT INTO schema_migrations (version) VALUES ('20150324152204');
+
+INSERT INTO schema_migrations (version) VALUES ('20150423145759');
\ No newline at end of file
diff --git a/services/api/lib/tasks/delete_old_job_logs.rake b/services/api/lib/tasks/delete_old_job_logs.rake
new file mode 100644 (file)
index 0000000..7f2b31e
--- /dev/null
@@ -0,0 +1,17 @@
+# This task finds jobs that have been finished for at least as long as
+# the duration specified in the `clean_job_log_rows_after`
+# configuration setting, and deletes their stderr logs from the logs table.
+
+namespace :db do
+  desc "Remove old job stderr entries from the logs table"
+  task delete_old_job_logs: :environment do
+    Log.select("logs.id").
+        joins("JOIN jobs ON object_uuid = jobs.uuid").
+        where("event_type = :etype AND jobs.log IS NOT NULL AND jobs.finished_at < :age",
+              etype: "stderr",
+              age: Rails.configuration.clean_job_log_rows_after.ago).
+        find_in_batches do |old_log_ids|
+      Log.where(id: old_log_ids.map(&:id)).delete_all
+    end
+  end
+end
diff --git a/services/api/lib/tasks/test_tasks.rake b/services/api/lib/tasks/test_tasks.rake
new file mode 100644 (file)
index 0000000..27bf232
--- /dev/null
@@ -0,0 +1,6 @@
+namespace :test do
+  new_task = Rake::TestTask.new(tasks: "test:prepare") do |t|
+    t.libs << "test"
+    t.pattern = "test/tasks/**/*_test.rb"
+  end
+end
index 7b3ed9eb1607d51ff71b5fec2329a6d5dc2ecd61..1002f9134736ab84136f2f8b9032ff630e14ce84 100755 (executable)
@@ -279,26 +279,24 @@ class Dispatcher
     @authorizations[job.uuid]
   end
 
-  def get_commit(src_repo, commit_hash)
-    # @fetched_commits[V]==true if we know commit V exists in the
-    # arvados_internal git repository.
-    if !@fetched_commits[commit_hash]
-      # check if the commit needs to be fetched or not
-      commit_rev = stdout_s(git_cmd("rev-list", "-n1", commit_hash),
-                            err: "/dev/null")
-      unless $? == 0 and commit_rev == commit_hash
-        # commit does not exist in internal repository, so import the source repository using git fetch-pack
-        cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
-        $stderr.puts "dispatch: #{cmd}"
-        $stderr.puts(stdout_s(cmd))
-        unless $? == 0
-          fail_job job, "git fetch-pack failed"
-          return nil
-        end
-      end
-      @fetched_commits[commit_hash] = true
+  def internal_repo_has_commit? sha1
+    if (not @fetched_commits[sha1] and
+        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
+        $? == 0)
+      @fetched_commits[sha1] = true
     end
-    @fetched_commits[commit_hash]
+    return @fetched_commits[sha1]
+  end
+
+  def get_commit src_repo, sha1
+    return true if internal_repo_has_commit? sha1
+
+    # commit does not exist in internal repository, so import the
+    # source repository using git fetch-pack
+    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
+    $stderr.puts "dispatch: #{cmd}"
+    $stderr.puts(stdout_s(cmd))
+    @fetched_commits[sha1] = ($? == 0)
   end
 
   def tag_commit(commit_hash, tag_name)
@@ -377,20 +375,42 @@ class Dispatcher
                          "GEM_PATH=#{ENV['GEM_PATH']}")
       end
 
-      repo = Repository.where(name: job.repository).first
-      if repo.nil? or repo.server_path.nil?
-        fail_job "Repository #{job.repository} not found under #{@repo_root}"
-        next
+      next unless get_authorization job
+
+      ready = internal_repo_has_commit? job.script_version
+
+      if not ready
+        # Import the commit from the specified repository into the
+        # internal repository. This should have been done already when
+        # the job was created/updated; this code is obsolete except to
+        # avoid deployment races. Failing the job would be a
+        # reasonable thing to do at this point.
+        repo = Repository.where(name: job.repository).first
+        if repo.nil? or repo.server_path.nil?
+          fail_job "Repository #{job.repository} not found under #{@repo_root}"
+          next
+        end
+        ready &&= get_commit repo.server_path, job.script_version
+        ready &&= tag_commit job.script_version, job.uuid
       end
 
-      ready = (get_authorization(job) and
-               get_commit(repo.server_path, job.script_version) and
-               tag_commit(job.script_version, job.uuid))
-      if ready and job.arvados_sdk_version
-        ready = (get_commit(@arvados_repo_path, job.arvados_sdk_version) and
-                 tag_commit(job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"))
+      # This should be unnecessary, because API server does it during
+      # job create/update, but it's still not a bad idea to verify the
+      # tag is correct before starting the job:
+      ready &&= tag_commit job.script_version, job.uuid
+
+      # The arvados_sdk_version doesn't support use of arbitrary
+      # remote URLs, so the requested version isn't necessarily copied
+      # into the internal repository yet.
+      if job.arvados_sdk_version
+        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
+        ready &&= tag_commit job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
+      end
+
+      if not ready
+        fail_job job, "commit not present in internal repository"
+        next
       end
-      next unless ready
 
       cmd_args += [@crunch_job_bin,
                    '--job-api-token', @authorizations[job.uuid].api_token,
index 869d9eeeb300e1853fd0f511ab6af01c5fec6d26..9199d178f6bcdfec3c8536d8da9f7e6b22613898 100644 (file)
@@ -187,12 +187,18 @@ active_no_prefs:
   api_token: 3kg612cdc0f3415c2428b9758f33bdfb07bc3561b00e86qdmi
   expires_at: 2038-01-01 00:00:00
 
-active_no_prefs_profile:
+active_no_prefs_profile_no_getting_started_shown:
   api_client: untrusted
-  user: active_no_prefs_profile
+  user: active_no_prefs_profile_no_getting_started_shown
   api_token: 3kg612cdc0f3415c242856758f33bdfb07bc3561b00e86qdmi
   expires_at: 2038-01-01 00:00:00
 
+active_no_prefs_profile_with_getting_started_shown:
+  api_client: untrusted
+  user: active_no_prefs_profile_with_getting_started_shown
+  api_token: 3kg612cdc0f3415c245786758f33bdfb07babcd1b00e86qdmi
+  expires_at: 2038-01-01 00:00:00
+
 user_foo_in_sharing_group:
   api_client: untrusted
   user: user_foo_in_sharing_group
index 7b4f8be6dc8d2fcaf6f5acbf36d96b339c3a3385..f6b99a06617a860d9d4c6681b60c2861b426d4e1 100644 (file)
@@ -127,6 +127,14 @@ anonymously_accessible_project:
   group_class: project
   description: An anonymously accessible project
 
+subproject_in_anonymous_accessible_project:
+  uuid: zzzzz-j7d0g-mhtfesvgmkolpyf
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  created_at: 2014-04-21 15:37:48 -0400
+  name: Subproject in anonymous accessible project
+  description: Description for subproject in anonymous accessible project
+  group_class: project
+
 active_user_has_can_manage:
   uuid: zzzzz-j7d0g-ptt1ou6a9lxrv07
   owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
@@ -248,3 +256,29 @@ project_owns_itself:
   description: ~
   updated_at: 2014-11-05 22:31:24.258093171 Z
   group_class: project
+
+# Used to test renaming when removed from the "asubproject" while
+# another such object with same name exists in home project.
+subproject_in_active_user_home_project_to_test_unique_key_violation:
+  uuid: zzzzz-j7d0g-subprojsamenam1
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2013-04-21 15:37:48 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_at: 2013-04-21 15:37:48 -0400
+  updated_at: 2013-04-21 15:37:48 -0400
+  name: Subproject to test owner uuid and name unique key violation upon removal
+  description: Subproject in active user home project to test owner uuid and name unique key violation upon removal
+  group_class: project
+
+subproject_in_asubproject_with_same_name_as_one_in_active_user_home:
+  uuid: zzzzz-j7d0g-subprojsamenam2
+  owner_uuid: zzzzz-j7d0g-axqo7eu9pwvna1x
+  created_at: 2013-04-21 15:37:48 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_at: 2013-04-21 15:37:48 -0400
+  updated_at: 2013-04-21 15:37:48 -0400
+  name: Subproject to test owner uuid and name unique key violation upon removal
+  description: "Removing this will result in name conflict with 'A project' in Home project and hence get renamed."
+  group_class: project
index c6620627041a9552d65fbac8610153ac79dbe195..8a4c34557c4e45c3cf1e0ae22a675901e8b3754e 100644 (file)
@@ -7,6 +7,8 @@ running:
   created_at: <%= 3.minute.ago.to_s(:db) %>
   started_at: <%= 3.minute.ago.to_s(:db) %>
   finished_at: ~
+  script: hash
+  repository: active/foo
   script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332
   running: true
   success: ~
@@ -31,6 +33,8 @@ running_cancelled:
   created_at: <%= 4.minute.ago.to_s(:db) %>
   started_at: <%= 3.minute.ago.to_s(:db) %>
   finished_at: ~
+  script: hash
+  repository: active/foo
   script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332
   running: true
   success: ~
@@ -56,6 +60,8 @@ uses_nonexistent_script_version:
   created_at: <%= 5.minute.ago.to_s(:db) %>
   started_at: <%= 3.minute.ago.to_s(:db) %>
   finished_at: <%= 2.minute.ago.to_s(:db) %>
+  script: hash
+  repository: active/foo
   running: false
   success: true
   output: d41d8cd98f00b204e9800998ecf8427e+0
@@ -157,6 +163,7 @@ runningbarbaz:
 previous_job_run:
   uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
   created_at: <%= 14.minute.ago.to_s(:db) %>
+  finished_at: <%= 13.minutes.ago.to_s(:db) %>
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   repository: active/foo
   script: hash
@@ -165,6 +172,23 @@ previous_job_run:
     input: fa7aeb5140e2848d39b416daeef4ffc5+45
     an_integer: "1"
   success: true
+  log: d41d8cd98f00b204e9800998ecf8427e+0
+  output: ea10d51bcf88862dbcc36eb292017dfd+45
+  state: Complete
+
+previous_ancient_job_run:
+  uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+  created_at: <%= 366.days.ago.to_s(:db) %>
+  finished_at: <%= 365.days.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/foo
+  script: hash
+  script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+  script_parameters:
+    input: fa7aeb5140e2848d39b416daeef4ffc5+45
+    an_integer: "2"
+  success: true
+  log: d41d8cd98f00b204e9800998ecf8427e+0
   output: ea10d51bcf88862dbcc36eb292017dfd+45
   state: Complete
 
@@ -179,12 +203,29 @@ previous_docker_job_run:
     input: fa7aeb5140e2848d39b416daeef4ffc5+45
     an_integer: "1"
   runtime_constraints:
-    docker_image: arvados/test
+    docker_image: arvados/apitestfixture
   success: true
   output: ea10d51bcf88862dbcc36eb292017dfd+45
   docker_image_locator: fa3c1a9cb6783f85f2ecda037e07b8c3+167
   state: Complete
 
+previous_ancient_docker_image_job_run:
+  uuid: zzzzz-8i9sb-t3b460aolxxuldl
+  created_at: <%= 144.minute.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/foo
+  script: hash
+  script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+  script_parameters:
+    input: fa7aeb5140e2848d39b416daeef4ffc5+45
+    an_integer: "2"
+  runtime_constraints:
+    docker_image: arvados/apitestfixture
+  success: true
+  output: ea10d51bcf88862dbcc36eb292017dfd+45
+  docker_image_locator: b519d9cb706a29fc7ea24dbea2f05851+93
+  state: Complete
+
 previous_job_run_with_arvados_sdk_version:
   uuid: zzzzz-8i9sb-eoo0321or2dw2jg
   created_at: <%= 14.minute.ago.to_s(:db) %>
@@ -197,7 +238,9 @@ previous_job_run_with_arvados_sdk_version:
     an_integer: "1"
   runtime_constraints:
     arvados_sdk_version: commit2
+    docker_image: arvados/apitestfixture
   arvados_sdk_version: 00634b2b8a492d6f121e3cf1d6587b821136a9a7
+  docker_image_locator: fa3c1a9cb6783f85f2ecda037e07b8c3+167
   success: true
   output: ea10d51bcf88862dbcc36eb292017dfd+45
   state: Complete
@@ -216,6 +259,20 @@ previous_job_run_no_output:
   output: ~
   state: Complete
 
+previous_job_run_superseded_by_hash_branch:
+  # This supplied_script_version is a branch name with later commits.
+  uuid: zzzzz-8i9sb-aeviezu5dahph3e
+  created_at: <%= 15.minute.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/shabranchnames
+  script: testscript
+  script_version: 7387838c69a21827834586cc42b467ff6c63293b
+  supplied_script_version: 738783
+  script_parameters: {}
+  success: true
+  output: d41d8cd98f00b204e9800998ecf8427e+0
+  state: Complete
+
 nondeterminisic_job_run:
   uuid: zzzzz-8i9sb-cjs4pklxxjykyyy
   created_at: <%= 14.minute.ago.to_s(:db) %>
index b8856efd38cea9d0e32677a87ceff1a8877a0077..42ecad35f4c2eba4f46cded33becf029a62d5694 100644 (file)
@@ -643,6 +643,36 @@ ancient_docker_image_collection_hash:
   properties:
     image_timestamp: "2010-06-10T14:30:00.184019565Z"
 
+ancient_docker_image_collection_tag:
+  uuid: zzzzz-o0j2j-dockercolltagzz
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2014-06-12 14:30:00.184389725 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2014-06-12 14:30:00.184019565 Z
+  updated_at: 2014-06-12 14:30:00.183829316 Z
+  link_class: docker_image_repo+tag
+  name: arvados/apitestfixture:latest
+  tail_uuid: ~
+  head_uuid: zzzzz-4zz18-t68oksiu9m80s4y
+  properties:
+    image_timestamp: "2010-06-10T14:30:00.184019565Z"
+
+docker_image_tag_like_hash:
+  uuid: zzzzz-o0j2j-dockerhashtagaa
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2014-06-11 14:30:00.184389725 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2014-06-11 14:30:00.184019565 Z
+  updated_at: 2014-06-11 14:30:00.183829316 Z
+  link_class: docker_image_repo+tag
+  name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:latest
+  tail_uuid: ~
+  head_uuid: zzzzz-4zz18-1v45jub259sjjgb
+  properties:
+    image_timestamp: "2014-06-10T14:30:00.184019565Z"
+
 job_reader_can_read_previous_job_run:
   # Permission link giving job_reader permission
   # to read previous_job_run
index aea7980b0509fda97c40a662807ae2e671f3f77f..9179e6dff92a4c62a0271dd78786b98dc726fef4 100644 (file)
@@ -101,3 +101,41 @@ log_line_for_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere
   updated_at: 2014-11-07 23:33:42.347455000 Z
   modified_at: 2014-11-07 23:33:42.347455000 Z
   object_owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+
+crunchstat_for_previous_job:
+  id: 10
+  uuid: zzzzz-57u5n-eir3aesha3kaene
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+  event_at: 2014-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2014-11-07_23:33:41 zzzzz-8i9sb-cjs4pklxxjykqqq 11592 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2014-11-07 23:33:42.351913000 Z
+  updated_at: 2014-11-07 23:33:42.347455000 Z
+  modified_at: 2014-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-xurymjxw79nv3jz
+
+crunchstat_for_ancient_job:
+  id: 11
+  uuid: zzzzz-57u5n-ixioph7ieb5ung8
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+  event_at: 2013-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2013-11-07_23:33:41 zzzzz-8i9sb-ahd7cie8jah9qui 29610 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2013-11-07 23:33:42.351913000 Z
+  updated_at: 2013-11-07 23:33:42.347455000 Z
+  modified_at: 2013-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-xurymjxw79nv3jz
index 1f1b128133d32c22d715909c2f45d91aecd010ab..41a7fc9720e77292721800004babca120ae45480 100644 (file)
@@ -256,7 +256,7 @@ pipeline_in_publicly_accessible_project:
   name: Pipeline in publicly accessible project
   pipeline_template_uuid: zzzzz-p5p6p-tmpltpublicproj
   state: Complete
-  created_at: 2014-09-15 12:00:00
+  created_at: <%= 1.minute.ago.to_s(:db) %>
   components:
     foo:
       script: foo
@@ -338,9 +338,25 @@ new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
           dataclass: Collection
           value: zzzzz-4zz18-bv31uwvy3neko21
 
+new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere:
+  uuid: zzzzz-d1hrv-newsharenotfile
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Pipeline in public project in New state with file type data class with objects elsewhere
+  pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
+  state: New
+  created_at: 2014-09-15 12:00:00
+  components:
+    foo:
+      script: foo
+      script_version: master
+      script_parameters:
+        input:
+          required: true
+          dataclass: File
+          value: zzzzz-4zz18-bv31uwvy3neko21/bar
+
 pipeline_in_running_state:
   name: running_with_job
-  state: Ready
   uuid: zzzzz-d1hrv-runningpipeline
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   created_at: <%= 3.1.minute.ago.to_s(:db) %>
@@ -355,6 +371,47 @@ pipeline_in_running_state:
       uuid: zzzzz-8i9sb-pshmckwoma9plh7
       script_version: master
 
+running_pipeline_with_complete_job:
+  uuid: zzzzz-d1hrv-partdonepipelin
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: RunningOnServer
+  components:
+   previous:
+    job:
+      uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+   running:
+    job:
+      uuid: zzzzz-8i9sb-pshmckwoma9plh7
+
+complete_pipeline_with_two_jobs:
+  uuid: zzzzz-d1hrv-twodonepipeline
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Complete
+  components:
+   ancient:
+    job:
+      uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+   previous:
+    job:
+      uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+
+failed_pipeline_with_two_jobs:
+  uuid: zzzzz-d1hrv-twofailpipeline
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Failed
+  components:
+   ancient:
+    job:
+      uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+   previous:
+    job:
+      uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+
 # Test Helper trims the rest of the file
 
 # Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper
index 40bf63dd7e2108b490841c940e12a7aa34253432..cbd82de9241101a72cc1c263903b9a403a8234fa 100644 (file)
@@ -203,3 +203,43 @@ pipeline_template_in_publicly_accessible_project:
           dataclass: Collection
           title: "default input"
           description: "input collection"
+
+# Used to test renaming when removed from the "aproject" subproject
+# while another such object with same name exists in home project.
+template_in_active_user_home_project_to_test_unique_key_violation:
+  uuid: zzzzz-p5p6p-templatsamenam1
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2013-04-14 12:35:04 -0400
+  updated_at: 2013-04-14 12:35:04 -0400
+  modified_at: 2013-04-14 12:35:04 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  name: Template to test owner uuid and name unique key violation upon removal
+  components:
+    script: foo
+    script_version: master
+    script_parameters:
+      input:
+        required: true
+        dataclass: Collection
+        title: "Foo/bar pair"
+        description: "Provide a collection containing at least two files."
+
+template_in_asubproject_with_same_name_as_one_in_active_user_home:
+  uuid: zzzzz-p5p6p-templatsamenam2
+  owner_uuid: zzzzz-j7d0g-axqo7eu9pwvna1x
+  created_at: 2013-04-14 12:35:04 -0400
+  updated_at: 2013-04-14 12:35:04 -0400
+  modified_at: 2013-04-14 12:35:04 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  name: Template to test owner uuid and name unique key violation upon removal
+  components:
+    script: foo
+    script_version: master
+    script_parameters:
+      input:
+        required: true
+        dataclass: Collection
+        title: "Foo/bar pair"
+        description: "Provide a collection containing at least two files."
index ab2e360218177e42bf39e422872104b43efc0503..a5aac1168b79e7818d026e2330a3f53363e38613 100644 (file)
@@ -39,3 +39,10 @@ repository4:
   name: admin/foo4
   created_at: 2015-01-01T00:00:00.123456Z
   modified_at: 2015-01-01T00:00:00.123456Z
+
+has_branch_with_commit_hash_name:
+  uuid: zzzzz-s0uqq-382brsig8rp3668
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
+  name: active/shabranchnames
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
index 4e17de3bb6982fc24012eba6465e9d329e061bcd..e2088db68a73d9da53d5e1bb0bafd6d9f874f61d 100644 (file)
@@ -30,6 +30,7 @@ admin:
     profile:
       organization: example.com
       role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 miniadmin:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -45,6 +46,7 @@ miniadmin:
     profile:
       organization: example.com
       role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 rominiadmin:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -60,6 +62,7 @@ rominiadmin:
     profile:
       organization: example.com
       role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 active:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -75,6 +78,7 @@ active:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 project_viewer:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -90,6 +94,7 @@ project_viewer:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 future_project_user:
   # Workbench tests give this user permission on aproject.
@@ -106,6 +111,7 @@ future_project_user:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 subproject_admin:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -121,6 +127,7 @@ subproject_admin:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 spectator:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -136,6 +143,7 @@ spectator:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 inactive_uninvited:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -173,6 +181,7 @@ inactive_but_signed_user_agreement:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 anonymous:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -198,6 +207,7 @@ job_reader:
     profile:
       organization: example.com
       role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 active_no_prefs:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -210,7 +220,7 @@ active_no_prefs:
   is_admin: false
   prefs: {}
 
-active_no_prefs_profile:
+active_no_prefs_profile_no_getting_started_shown:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-a46c98d1td4aoj4
   email: active_no_prefs_profile@arvados.local
@@ -222,6 +232,19 @@ active_no_prefs_profile:
   prefs:
     test: abc
 
+active_no_prefs_profile_with_getting_started_shown:
+  owner_uuid: zzzzz-tpzed-000000000000000
+  uuid: zzzzz-tpzed-getstartnoprofl
+  email: active_no_prefs_profile@arvados.local
+  first_name: HasPrefs
+  last_name: NoProfileWithGettingStartedShown
+  identity_url: https://active_no_prefs_profile_seen_gs.openid.local
+  is_active: true
+  is_admin: false
+  prefs:
+    test: abc
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
+
 # Fixtures to test granting and removing permissions.
 
 user_foo_in_sharing_group:
@@ -259,6 +282,7 @@ user1_with_load:
     profile:
       organization: example.com
       role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
 
 fuse:
   owner_uuid: zzzzz-tpzed-000000000000000
@@ -274,3 +298,4 @@ fuse:
     profile:
       organization: example.com
       role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
index 54ffe66f174baf341ae19a00a58c71b578f9b3ce..3257e494c500cb1b901cb65382f51b27afeba7ec 100644 (file)
@@ -91,6 +91,45 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
     assert_equal 99999, resp['offset']
   end
 
+  def request_capped_index(params={})
+    authorize_with :user1_with_load
+    coll1 = collections(:collection_1_of_201)
+    Rails.configuration.max_index_database_read =
+      yield(coll1.manifest_text.size)
+    get :index, {
+      select: %w(uuid manifest_text),
+      filters: [["owner_uuid", "=", coll1.owner_uuid]],
+      limit: 300,
+    }.merge(params)
+  end
+
+  test "index with manifest_text limited by max_index_database_read" do
+    request_capped_index() { |size| (size * 3) + 1 }
+    assert_response :success
+    assert_equal(4, json_response["items"].size)
+    assert_equal(4, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with limit" do
+    request_capped_index(limit: 5) { |size| size * 20 }
+    assert_response :success
+    assert_equal(5, json_response["items"].size)
+    assert_equal(5, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with order" do
+    request_capped_index(order: "name DESC") { |size| (size * 15) + 1 }
+    assert_response :success
+    assert_equal(16, json_response["items"].size)
+    assert_empty(json_response["items"].reject do |coll|
+                   coll["name"] !~ /^Collection_9/
+                 end)
+    assert_equal(16, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
   test "admin can create collection with unsigned manifest" do
     authorize_with :admin
     test_collection = {
index 2b109b61f795c00fd6b3cd971ddaa94cfbca159e..4af1c6eaa8b06fa803aa3f05d9e7cd6bb3058b59 100644 (file)
@@ -1,102 +1,4 @@
 require 'test_helper'
-require 'helpers/git_test_helper'
-
-# NOTE: calling Commit.find_commit_range(user, nil, nil, 'rev') will produce
-# an error message "fatal: bad object 'rev'" on stderr if 'rev' does not exist
-# in a given repository.  Many of these tests report such errors; their presence
-# does not represent a fatal condition.
-#
-# TODO(twp): consider better error handling of these messages, or
-# decide to abandon it.
 
 class Arvados::V1::CommitsControllerTest < ActionController::TestCase
-  fixtures :repositories, :users
-
-  # See git_setup.rb for the commit log for test.git.tar
-  include GitTestHelper
-
-  test "test_find_commit_range" do
-    authorize_with :active
-
-  # single
-    a = Commit.find_commit_range(users(:active), nil, nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_branch1" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, nil, 'master', nil)
-    assert_includes(a, 'f35f99b7d32bac257f5989df02b9f12ee1a9b0d6')
-    assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
-
-  #test "test_branch2" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'b1', nil)
-    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
-
-  #test "test_branch3" do
-    a = Commit.find_commit_range(users(:active), 'active/foo', nil, 'HEAD', nil)
-    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
-
-  #test "test_single_revision_repo" do
-    a = Commit.find_commit_range(users(:active), "active/foo", nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-    a = Commit.find_commit_range(users(:active), "arvados", nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal nil, a
-
-  #test "test_multi_revision" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_tag" do
-    # complains "fatal: ambiguous argument 'tag1': unknown revision or path
-    # not in the working tree."
-    a = Commit.find_commit_range(users(:active), nil, 'tag1', 'master', nil)
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
-
-  #test "test_multi_revision_exclude" do
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_multi_revision_tagged_exclude" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-    Dir.mktmpdir do |touchdir|
-      # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), nil, nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), nil, nil, "$(uname>#{touchdir}/uh_oh)", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), nil, "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to 'excludes'
-      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to 'excludes'
-      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-    end
-
-  end
-
 end
index 1dd620a68cd975ae3866081424a2fd00c065f5ff..64d559107c19257ca7a954d323f42cba60c7a9c2 100644 (file)
@@ -323,6 +323,11 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
                      new_job['script_version'])
   end
 
+  test "cannot reuse job when hash-like branch includes newer commit" do
+    check_new_job_created_from({job: {script_version: "738783"}},
+                               :previous_job_run_superseded_by_hash_branch)
+  end
+
   BASE_FILTERS = {
     'repository' => ['=', 'active/foo'],
     'script' => ['=', 'hash'],
@@ -510,6 +515,21 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
     assert_not_equal(jobs(:previous_docker_job_run).uuid, new_job.uuid)
   end
 
+  test "don't reuse job using older Docker image of same name" do
+    jobspec = {runtime_constraints: {
+        docker_image: "arvados/apitestfixture",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_ancient_docker_image_job_run)
+  end
+
+  test "reuse job with Docker image that has hash name" do
+    jobspec = {runtime_constraints: {
+        docker_image: "a" * 64,
+      }}
+    check_job_reused_from(jobspec, :previous_docker_job_run)
+  end
+
   ["repository", "script"].each do |skip_key|
     test "missing #{skip_key} filter raises an error" do
       filters = filters_from_hash(BASE_FILTERS.reject { |k| k == skip_key })
@@ -599,35 +619,52 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
                     jobs(:previous_docker_job_run).uuid)
   end
 
-  def create_foo_hash_job_params(params)
+  JOB_SUBMIT_KEYS = [:script, :script_parameters, :script_version, :repository]
+  DEFAULT_START_JOB = :previous_job_run
+
+  def create_job_params(params, start_from=DEFAULT_START_JOB)
     if not params.has_key?(:find_or_create)
       params[:find_or_create] = true
     end
     job_attrs = params.delete(:job) || {}
-    params[:job] = {
-      script: "hash",
-      script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "active/foo",
-      script_parameters: {
-        input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-        an_integer: '1',
-      },
-    }.merge(job_attrs)
+    start_job = jobs(start_from)
+    params[:job] = Hash[JOB_SUBMIT_KEYS.map do |key|
+                          [key, start_job.send(key)]
+                        end]
+    params[:job][:runtime_constraints] =
+      job_attrs.delete(:runtime_constraints) || {}
+    { arvados_sdk_version: :arvados_sdk_version,
+      docker_image_locator: :docker_image }.each do |method, constraint_key|
+      if constraint_value = start_job.send(method)
+        params[:job][:runtime_constraints][constraint_key] ||= constraint_value
+      end
+    end
+    params[:job].merge!(job_attrs)
     params
   end
 
-  def check_new_job_created_from(params)
-    start_time = Time.now
-    post(:create, create_foo_hash_job_params(params))
+  def create_job_from(params, start_from)
+    post(:create, create_job_params(params, start_from))
     assert_response :success
     new_job = assigns(:object)
     assert_not_nil new_job
+    new_job
+  end
+
+  def check_new_job_created_from(params, start_from=DEFAULT_START_JOB)
+    start_time = Time.now
+    new_job = create_job_from(params, start_from)
     assert_operator(start_time, :<=, new_job.created_at)
     new_job
   end
 
-  def check_errors_from(params)
-    post(:create, create_foo_hash_job_params(params))
+  def check_job_reused_from(params, start_from)
+    new_job = create_job_from(params, start_from)
+    assert_equal(jobs(start_from).uuid, new_job.uuid)
+  end
+
+  def check_errors_from(params, start_from=DEFAULT_START_JOB)
+    post(:create, create_job_params(params, start_from))
     assert_includes(405..499, @response.code.to_i)
     errors = json_response.fetch("errors", [])
     assert(errors.any?, "no errors assigned from #{params}")
@@ -670,27 +707,40 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
            "bad refspec not mentioned in error message")
   end
 
-  test "can't reuse job with older Arvados SDK version" do
-    params = {
-      script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
-      runtime_constraints: {
-        "arvados_sdk_version" => "master",
-        "docker_image" => links(:docker_image_collection_tag).name,
-      },
-    }
-    check_new_job_created_from(job: params)
+  test "don't reuse job with older Arvados SDK version specified by branch" do
+    jobspec = {runtime_constraints: {
+        arvados_sdk_version: "master",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_job_run_with_arvados_sdk_version)
+  end
+
+  test "don't reuse job with older Arvados SDK version specified by commit" do
+    jobspec = {runtime_constraints: {
+        arvados_sdk_version: "ca68b24e51992e790f29df5cc4bc54ce1da4a1c2",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_job_run_with_arvados_sdk_version)
+  end
+
+  test "don't reuse job with newer Arvados SDK version specified by commit" do
+    jobspec = {runtime_constraints: {
+        arvados_sdk_version: "436637c87a1d2bdbf4b624008304064b6cf0e30c",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_job_run_with_arvados_sdk_version)
   end
 
   test "reuse job from arvados_sdk_version git filters" do
+    prev_job = jobs(:previous_job_run_with_arvados_sdk_version)
     filters_hash = BASE_FILTERS.
-      merge("arvados_sdk_version" => ["in git", "commit2"])
+      merge("arvados_sdk_version" => ["in git", "commit2"],
+            "docker_image_locator" => ["=", prev_job.docker_image_locator])
     filters_hash.delete("script_version")
-    params = create_foo_hash_job_params(filters:
-                                        filters_from_hash(filters_hash))
+    params = create_job_params(filters: filters_from_hash(filters_hash))
     post(:create, params)
     assert_response :success
-    assert_equal(jobs(:previous_job_run_with_arvados_sdk_version).uuid,
-                 assigns(:object).uuid)
+    assert_equal(prev_job.uuid, assigns(:object).uuid)
   end
 
   test "create new job because of arvados_sdk_version 'not in git' filters" do
index b8b061f69b774fb7f2b330517d37d00f1da4e64a..1e1425e92b7d27057e89a335c2480b8024b0c444 100644 (file)
@@ -392,4 +392,45 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     post :lock, {id: jobs(:running).uuid}
     assert_response 403 # forbidden
   end
+
+  test 'reject invalid commit in remote repository' do
+    authorize_with :active
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    post :create, job: {
+      script: "hash",
+      script_version: "abc123",
+      repository: url,
+      script_parameters: {}
+    }
+    assert_response 422
+  end
+
+  test 'tag remote commit in internal repository' do
+    authorize_with :active
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    post :create, job: {
+      script: "hash",
+      script_version: "master",
+      repository: url,
+      script_parameters: {}
+    }
+    assert_response :success
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(json_response['uuid']))
+  end
+
+  test 'tag local commit in internal repository' do
+    authorize_with :active
+    post :create, job: {
+      script: "hash",
+      script_version: "master",
+      repository: "active/foo",
+      script_parameters: {}
+    }
+    assert_response :success
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(json_response['uuid']))
+  end
 end
index fe5bb1cd09f92fdb390ce215c338f6ada750d534..7f4ed8e4f16745a4146fe563b0af5e9b8dd880ab 100644 (file)
@@ -97,26 +97,45 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
   end
 
   [
-    {config: "example.com", host: "example.com"},
-    {config: false, host: "git.zzzzz.arvadosapi.com"}
-  ].each do |set_git_host|
-    test "setting git_host to #{set_git_host[:host]} changes fetch/push_url to #{set_git_host[:config]}" do
-      Rails.configuration.git_host = set_git_host[:config]
+    {cfg: :git_repo_ssh_base, cfgval: "git@example.com:", match: %r"^git@example.com:/"},
+    {cfg: :git_repo_ssh_base, cfgval: true, match: %r"^git@git.zzzzz.arvadosapi.com:/"},
+    {cfg: :git_repo_ssh_base, cfgval: false, refute: /^git@/ },
+    {cfg: :git_repo_https_base, cfgval: "https://example.com/", match: %r"https://example.com/"},
+    {cfg: :git_repo_https_base, cfgval: true, match: %r"^https://git.zzzzz.arvadosapi.com/"},
+    {cfg: :git_repo_https_base, cfgval: false, refute: /^http/ },
+  ].each do |expect|
+    test "set #{expect[:cfg]} to #{expect[:cfgval]}" do
+      Rails.configuration.send expect[:cfg].to_s+"=", expect[:cfgval]
       authorize_with :active
-      get(:index)
+      get :index
       assert_response :success
-      assert_includes(json_response["items"].map { |r| r["fetch_url"] },
-                      "git@#{set_git_host[:host]}:active/foo.git")
-      assert_includes(json_response["items"].map { |r| r["push_url"] },
-                      "git@#{set_git_host[:host]}:active/foo.git")
+      json_response['items'].each do |r|
+        if expect[:refute]
+          r['clone_urls'].each do |u|
+            refute_match expect[:refute], u
+          end
+        else
+          assert r['clone_urls'].any? do |u|
+            expect[:prefix].match u
+          end
+        end
+      end
     end
   end
 
-  test "can select push_url in index" do
+  test "select push_url in index" do
     authorize_with :active
     get(:index, {select: ["uuid", "push_url"]})
     assert_response :success
     assert_includes(json_response["items"].map { |r| r["push_url"] },
                     "git@git.zzzzz.arvadosapi.com:active/foo.git")
   end
+
+  test "select clone_urls in index" do
+    authorize_with :active
+    get(:index, {select: ["uuid", "clone_urls"]})
+    assert_response :success
+    assert_includes(json_response["items"].map { |r| r["clone_urls"] }.flatten,
+                    "git@git.zzzzz.arvadosapi.com:active/foo.git")
+  end
 end
index f776ad2e56289727a577d8895ac730d27cc0f6a7..bf27d735cc6b98cb962d9c09e86a01ddc4b4142d 100644 (file)
@@ -745,17 +745,17 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
     authorize_with :admin
 
     user = {}
-    user[:prefs] = users(:active_no_prefs_profile).prefs
+    user[:prefs] = users(:active_no_prefs_profile_no_getting_started_shown).prefs
     user[:prefs][:profile] = {:profile => {'organization' => 'example.com'}}
     put :update, {
-      id: users(:active_no_prefs_profile).uuid,
+      id: users(:active_no_prefs_profile_no_getting_started_shown).uuid,
       user: user
     }
     assert_response :success
 
     found_email = false
     ActionMailer::Base.deliveries.andand.each do |email|
-      if email.subject == "Profile created by #{users(:active_no_prefs_profile).email}"
+      if email.subject == "Profile created by #{users(:active_no_prefs_profile_no_getting_started_shown).email}"
         found_email = true
         break
       end
index 67e99c18dcfb25340153ace9e4d29674af17bb16..6fce321953d3add8138056306629500927771f65 100644 (file)
@@ -14,15 +14,47 @@ require 'tmpdir'
 module GitTestHelper
   def self.included base
     base.setup do
-      @tmpdir = Dir.mktmpdir()
-      system("tar", "-xC", @tmpdir, "-f", "test/test.git.tar")
+      # Extract the test repository data into the default test
+      # environment's Rails.configuration.git_repositories_dir. (We
+      # don't use that config setting here, though: it doesn't seem
+      # worth the risk of stepping on a real git repo root.)
+      @tmpdir = Rails.root.join 'tmp', 'git'
+      FileUtils.mkdir_p @tmpdir
+      system("tar", "-xC", @tmpdir.to_s, "-f", "test/test.git.tar")
       Rails.configuration.git_repositories_dir = "#{@tmpdir}/test"
-      Commit.refresh_repositories
+
+      intdir = Rails.configuration.git_internal_dir
+      if not File.exist? intdir
+        FileUtils.mkdir_p intdir
+        IO.read("|git --git-dir #{intdir.to_s.shellescape} init")
+        assert $?.success?
+      end
     end
 
     base.teardown do
       FileUtils.remove_entry @tmpdir, true
-      Commit.refresh_repositories
+      FileUtils.remove_entry Commit.cache_dir_base, true
+    end
+  end
+
+  def internal_tag tag
+    IO.read "|git --git-dir #{Rails.configuration.git_internal_dir.shellescape} log --format=format:%H -n1 #{tag.shellescape}"
+  end
+
+  # Intercept fetch_remote_repository and fetch from a specified url
+  # or local fixture instead of the remote url requested. fakeurl can
+  # be a url (probably starting with file:///) or the name of a
+  # fixture (as a symbol)
+  def fetch_remote_from_local_repo url, fakeurl
+    if fakeurl.is_a? Symbol
+      fakeurl = 'file://' + repositories(fakeurl).server_path
+    end
+    Commit.expects(:fetch_remote_repository).once.with do |gitdir, giturl|
+      if giturl == url
+        Commit.unstub(:fetch_remote_repository)
+        Commit.fetch_remote_repository gitdir, fakeurl
+        true
+      end
     end
   end
 end
diff --git a/services/api/test/tasks/delete_old_job_logs_test.rb b/services/api/test/tasks/delete_old_job_logs_test.rb
new file mode 100644 (file)
index 0000000..b922fb3
--- /dev/null
@@ -0,0 +1,50 @@
+require 'test_helper'
+require 'rake'
+
+Rake.application.rake_require "tasks/delete_old_job_logs"
+Rake::Task.define_task(:environment)
+
+class DeleteOldJobLogsTaskTest < ActiveSupport::TestCase
+  TASK_NAME = "db:delete_old_job_logs"
+
+  def log_uuids(*fixture_names)
+    fixture_names.map { |name| logs(name).uuid }
+  end
+
+  def run_with_expiry(clean_after)
+    Rails.configuration.clean_job_log_rows_after = clean_after
+    Rake::Task[TASK_NAME].reenable
+    Rake.application.invoke_task TASK_NAME
+  end
+
+  def job_stderr_logs
+    Log.where("object_uuid LIKE :pattern AND event_type = :etype",
+              pattern: "_____-8i9sb-_______________",
+              etype: "stderr")
+  end
+
+  def check_existence(test_method, fixture_uuids)
+    uuids_now = job_stderr_logs.map(&:uuid)
+    fixture_uuids.each do |expect_uuid|
+      send(test_method, uuids_now, expect_uuid)
+    end
+  end
+
+  test "delete all logs" do
+    uuids_to_keep = log_uuids(:crunchstat_for_running_job)
+    uuids_to_clean = log_uuids(:crunchstat_for_previous_job,
+                               :crunchstat_for_ancient_job)
+    run_with_expiry(1)
+    check_existence(:assert_includes, uuids_to_keep)
+    check_existence(:refute_includes, uuids_to_clean)
+  end
+
+  test "delete only old logs" do
+    uuids_to_keep = log_uuids(:crunchstat_for_running_job,
+                              :crunchstat_for_previous_job)
+    uuids_to_clean = log_uuids(:crunchstat_for_ancient_job)
+    run_with_expiry(360.days)
+    check_existence(:assert_includes, uuids_to_keep)
+    check_existence(:refute_includes, uuids_to_clean)
+  end
+end
index 9b8e4d5a1671321630e342c6ced56d2fc31e0d15..faa0d656d392c1862349c69234ae408ee8dbe738 100644 (file)
Binary files a/services/api/test/test.git.tar and b/services/api/test/test.git.tar differ
index bf5afea1e2dd6b44fa68332df4c1f1512855a472..68d4bbf5af4b03349b11259f82357e917dd52cf7 100644 (file)
@@ -22,6 +22,7 @@ end
 
 require File.expand_path('../../config/environment', __FILE__)
 require 'rails/test_help'
+require 'mocha/mini_test'
 
 module ArvadosTestSupport
   def json_response
index 0418a94510d9e046564cd5b49db5f659bd194330..6918aa0d00058b4d6183e92c89506aacb45a3f85 100644 (file)
@@ -131,7 +131,7 @@ class ArvadosModelTest < ActiveSupport::TestCase
         search_index_columns = table_class.searchable_columns('ilike')
         # Disappointing, but text columns aren't indexed yet.
         search_index_columns -= table_class.columns.select { |c|
-          c.type == :text or c.name == 'description'
+          c.type == :text or c.name == 'description' or c.name == 'file_names'
         }.collect(&:name)
 
         indexes = ActiveRecord::Base.connection.indexes(table)
index d8b8365efa212f3447aceddec6decd2154520584..93f472af60bbcb7fe5b38897b9acc28baf29a50d 100644 (file)
@@ -253,4 +253,11 @@ class CollectionTest < ActiveSupport::TestCase
       assert c.valid?
     end
   end
+
+  test "find_all_for_docker_image resolves names that look like hashes" do
+    coll_list = Collection.
+      find_all_for_docker_image('a' * 64, nil, [users(:active)])
+    coll_uuids = coll_list.map(&:uuid)
+    assert_includes(coll_uuids, collections(:docker_image).uuid)
+  end
 end
index 2424af32755cf08c4eefbb96cecc081d35f68ef0..b57c23b4538dee4339a0a27630a1ad36e7e575a6 100644 (file)
@@ -1,7 +1,217 @@
 require 'test_helper'
+require 'helpers/git_test_helper'
+
+# NOTE: calling Commit.find_commit_range(nil, nil, 'rev')
+# produces an error message "fatal: bad object 'rev'" on stderr if
+# 'rev' does not exist in a given repository.  Many of these tests
+# report such errors; their presence does not represent a fatal
+# condition.
 
 class CommitTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  # See git_setup.rb for the commit log for test.git.tar
+  include GitTestHelper
+
+  setup do
+    authorize_with :active
+  end
+
+  test 'find_commit_range does not bypass permissions' do
+    authorize_with :inactive
+    assert_raises ArgumentError do
+      c = Commit.find_commit_range 'foo', nil, 'master', []
+    end
+  end
+
+  [
+   'https://github.com/curoverse/arvados.git',
+   'http://github.com/curoverse/arvados.git',
+   'git://github.com/curoverse/arvados.git',
+  ].each do |url|
+    test "find_commit_range uses fetch_remote_repository to get #{url}" do
+      fake_gitdir = repositories(:foo).server_path
+      Commit.expects(:cache_dir_for).once.with(url).returns fake_gitdir
+      Commit.expects(:fetch_remote_repository).once.with(fake_gitdir, url).returns true
+      c = Commit.find_commit_range url, nil, 'master', []
+      refute_empty c
+    end
+  end
+
+  [
+   'bogus/repo',
+   '/bogus/repo',
+   '/not/allowed/.git',
+   'file:///not/allowed.git',
+   'git.curoverse.com/arvados.git',
+   'github.com/curoverse/arvados.git',
+  ].each do |url|
+    test "find_commit_range skips fetch_remote_repository for #{url}" do
+      Commit.expects(:fetch_remote_repository).never
+      assert_raises ArgumentError do
+        Commit.find_commit_range url, nil, 'master', []
+      end
+    end
+  end
+
+  test 'fetch_remote_repository does not leak commits across repositories' do
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    c = Commit.find_commit_range url, nil, 'master', []
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57'], c
+
+    url = "http://localhost:2/fake/fake.git"
+    fetch_remote_from_local_repo url, 'file://' + File.expand_path('../../.git', Rails.root)
+    c = Commit.find_commit_range url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
+    assert_equal [], c
+  end
+
+  test 'tag_in_internal_repository creates and updates tags in internal.git' do
+    authorize_with :active
+    gitint = "git --git-dir #{Rails.configuration.git_internal_dir}"
+    IO.read("|#{gitint} tag -d testtag 2>/dev/null") # "no such tag", fine
+    assert_match /^fatal: /, IO.read("|#{gitint} show testtag 2>&1")
+    refute $?.success?
+    Commit.tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
+    assert_match /^commit 31ce37f/, IO.read("|#{gitint} show testtag")
+    assert $?.success?
+  end
+
+  # In active/shabranchnames, "7387838c69a21827834586cc42b467ff6c63293b" is
+  # both a commit hash, and the name of a branch that begins from that same
+  # commit.
+  COMMIT_BRANCH_NAME = "7387838c69a21827834586cc42b467ff6c63293b"
+  # A commit that appears in the branch after 7387838c.
+  COMMIT_BRANCH_COMMIT_2 = "abec49829bf1758413509b7ffcab32a771b71e81"
+  # "738783" is another branch that starts from the above commit.
+  SHORT_COMMIT_BRANCH_NAME = COMMIT_BRANCH_NAME[0, 6]
+  # A commit that appears in branch 738783 after 7387838c.
+  SHORT_BRANCH_COMMIT_2 = "77e1a93093663705a63bb4d505698047e109dedd"
+
+  test "find_commit_range min_version prefers commits over branch names" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          COMMIT_BRANCH_NAME, nil, nil))
+  end
+
+  test "find_commit_range max_version prefers commits over branch names" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          nil, COMMIT_BRANCH_NAME, nil))
+  end
+
+  test "find_commit_range min_version with short branch name" do
+    assert_equal([SHORT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          SHORT_COMMIT_BRANCH_NAME, nil, nil))
+  end
+
+  test "find_commit_range max_version with short branch name" do
+    assert_equal([SHORT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          nil, SHORT_COMMIT_BRANCH_NAME, nil))
+  end
+
+  test "find_commit_range min_version with disambiguated branch name" do
+    assert_equal([COMMIT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          "heads/#{COMMIT_BRANCH_NAME}",
+                                          nil, nil))
+  end
+
+  test "find_commit_range max_version with disambiguated branch name" do
+    assert_equal([COMMIT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames", nil,
+                                          "heads/#{COMMIT_BRANCH_NAME}", nil))
+  end
+
+  test "find_commit_range min_version with unambiguous short name" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          COMMIT_BRANCH_NAME[0..-2], nil, nil))
+  end
+
+  test "find_commit_range max_version with unambiguous short name" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames", nil,
+                                          COMMIT_BRANCH_NAME[0..-2], nil))
+  end
+
+  test "find_commit_range laundry list" do
+    authorize_with :active
+
+    # single
+    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_branch1" do
+    a = Commit.find_commit_range('active/foo', nil, 'master', nil)
+    assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
+
+    #test "test_branch2" do
+    a = Commit.find_commit_range('active/foo', nil, 'b1', nil)
+    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
+
+    #test "test_branch3" do
+    a = Commit.find_commit_range('active/foo', nil, 'HEAD', nil)
+    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
+
+    #test "test_single_revision_repo" do
+    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+    a = Commit.find_commit_range('arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal [], a
+
+    #test "test_multi_revision" do
+    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_tag" do
+    # complains "fatal: ambiguous argument 'tag1': unknown revision or path
+    # not in the working tree."
+    a = Commit.find_commit_range('active/foo', 'tag1', 'master', nil)
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
+
+    #test "test_multi_revision_exclude" do
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_multi_revision_tagged_exclude" do
+    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    Dir.mktmpdir do |touchdir|
+      # invalid input to maximum
+      a = Commit.find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to maximum
+      a = Commit.find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to minimum
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to minimum
+      a = Commit.find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to 'excludes'
+      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to 'excludes'
+      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+    end
+  end
 end
index 1c8573eb190c3d07ef957c8c5c0ff1e37844ae40..641488986217f7db39165bcb9b2d69ae652a52e9 100644 (file)
@@ -78,6 +78,16 @@ class JobTest < ActiveSupport::TestCase
     assert(job.invalid?, "Job with bad Docker tag valid")
   end
 
+  test "create a job with a disambiguated script_version branch name" do
+    job = Job.
+      new(script: "testscript",
+          script_version: "heads/7387838c69a21827834586cc42b467ff6c63293b",
+          repository: "active/shabranchnames",
+          script_parameters: {})
+    assert(job.save)
+    assert_equal("abec49829bf1758413509b7ffcab32a771b71e81", job.script_version)
+  end
+
   test "locate a Docker image with a partial hash" do
     image_hash = links(:docker_image_collection_hash).name[0..24]
     job = Job.new job_attrs(runtime_constraints:
@@ -400,4 +410,15 @@ class JobTest < ActiveSupport::TestCase
     job = Job.create!(job_attrs(good_params))
     assert job.valid?
   end
+
+  test 'update job uuid tag in internal.git when version changes' do
+    authorize_with :active
+    j = jobs :queued
+    j.update_attributes repository: 'active/foo', script_version: 'b1'
+    assert_equal('1de84a854e2b440dc53bf42f8548afa4c17da332',
+                 internal_tag(j.uuid))
+    j.update_attributes repository: 'active/foo', script_version: 'master'
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(j.uuid))
+  end
 end
index 5acef1bb1e769ca0d1cbc7f6a80d124286b33aa8..288e1184fa2be2cd2ab955edeff6356f5d6e5cd3 100644 (file)
@@ -108,6 +108,7 @@ class RepositoryTest < ActiveSupport::TestCase
 
   test "fetch_url" do
     repo = new_repo(:active, name: "active/fetchtest")
+    repo.save
     assert_equal(default_git_url("fetchtest", "active"), repo.fetch_url)
   end
 
@@ -115,11 +116,13 @@ class RepositoryTest < ActiveSupport::TestCase
     set_user_from_auth :admin
     repo = Repository.new(owner_uuid: users(:system_user).uuid,
                           name: "fetchtest")
+    repo.save
     assert_equal(default_git_url("fetchtest"), repo.fetch_url)
   end
 
   test "push_url" do
     repo = new_repo(:active, name: "active/pushtest")
+    repo.save
     assert_equal(default_git_url("pushtest", "active"), repo.push_url)
   end
 
@@ -127,6 +130,7 @@ class RepositoryTest < ActiveSupport::TestCase
     set_user_from_auth :admin
     repo = Repository.new(owner_uuid: users(:system_user).uuid,
                           name: "pushtest")
+    repo.save
     assert_equal(default_git_url("pushtest"), repo.push_url)
   end
 
index 45dd186dfd1b47f41a98441218976c11bb2dcef7..b96645ce263d2cb97b6028693b88e2334df51819 100644 (file)
@@ -457,6 +457,7 @@ class UserTest < ActiveSupport::TestCase
 
     vm_perm = find_obj_in_resp response, 'Link', 'arvados#virtualMachine'
     verify_link vm_perm, 'permission', 'can_login', resp_user[:uuid], vm.uuid
+    assert_equal("foo", vm_perm.properties["username"])
   end
 
   test "setup new user with junk in database" do
@@ -499,6 +500,7 @@ class UserTest < ActiveSupport::TestCase
 
     vm_perm = find_obj_in_resp response, 'Link', 'arvados#virtualMachine'
     verify_link vm_perm, 'permission', 'can_login', resp_user[:uuid], vm.uuid
+    assert_equal("foo", vm_perm.properties["username"])
   end
 
   test "setup new user in multiple steps" do
@@ -552,6 +554,7 @@ class UserTest < ActiveSupport::TestCase
 
     vm_perm = find_obj_in_resp response, 'Link', 'arvados#virtualMachine'
     verify_link vm_perm, 'permission', 'can_login', resp_user[:uuid], vm.uuid
+    assert_equal("foo", vm_perm.properties["username"])
   end
 
   def find_obj_in_resp (response_items, object_type, head_kind=nil)
index ef16acbd6f20975896f3cba1dd819be200a3e4ab..6313d50d685e7a17c4854cbb5c62305c905bcb72 100644 (file)
@@ -52,7 +52,7 @@ func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        w.WriteHeader(statusCode)
                        w.Write([]byte(statusText))
                }
-               log.Println(quoteStrings(r.RemoteAddr, username, password, wroteStatus, statusText, repoName, r.URL.Path)...)
+               log.Println(quoteStrings(r.RemoteAddr, username, password, wroteStatus, statusText, repoName, r.Method, r.URL.Path)...)
        }()
 
        // HTTP request username is logged, but unused. Password is an
@@ -60,7 +60,7 @@ func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        username, password, ok := BasicAuth(r)
        if !ok || username == "" || password == "" {
                statusCode, statusText = http.StatusUnauthorized, "no credentials provided"
-               w.Header().Add("WWW-Authenticate", "basic")
+               w.Header().Add("WWW-Authenticate", "Basic realm=\"git\"")
                return
        }
 
@@ -87,7 +87,7 @@ func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        arv.ApiToken = password
        reposFound := arvadosclient.Dict{}
        if err := arv.List("repositories", arvadosclient.Dict{
-               "filters": [][]string{[]string{"name", "=", repoName}},
+               "filters": [][]string{{"name", "=", repoName}},
        }, &reposFound); err != nil {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
index 6bc88cb1fbe8cb27d0b4ae43d22bfaf5c8ff09d3..2bd84dc82e99e5ce28e44596d9531d9d1c995396 100644 (file)
@@ -14,15 +14,15 @@ type basicAuthTestCase struct {
 
 func TestBasicAuth(t *testing.T) {
        tests := []basicAuthTestCase{
-               basicAuthTestCase{"Basic Zm9vOmJhcg==", "foo", "bar", true},
-               basicAuthTestCase{"Bogus Zm9vOmJhcg==", "", "", false},
-               basicAuthTestCase{"Zm9vOmJhcg==", "", "", false},
-               basicAuthTestCase{"Basic", "", "", false},
-               basicAuthTestCase{"", "", "", false},
+               {"Basic Zm9vOmJhcg==", "foo", "bar", true},
+               {"Bogus Zm9vOmJhcg==", "", "", false},
+               {"Zm9vOmJhcg==", "", "", false},
+               {"Basic", "", "", false},
+               {"", "", "", false},
        }
        for _, test := range tests {
                if u, p, ok := BasicAuth(&http.Request{Header: map[string][]string{
-                       "Authorization": []string{test.hdr},
+                       "Authorization": {test.hdr},
                }}); u != test.user || p != test.pass || ok != test.ok {
                        t.Error("got:", u, p, ok, "expected:", test.user, test.pass, test.ok, "from:", test.hdr)
                }
index 82d71ae76bf4d23993e706f314aebdb8cc2b0ae9..e5ddc29dec93b2a9a2f08fef9d7f31b9706da2dc 100644 (file)
@@ -14,6 +14,12 @@ import (
 
 var _ = check.Suite(&IntegrationSuite{})
 
+const (
+       spectatorToken = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+       activeToken    = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+       anonymousToken = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+)
+
 // IntegrationSuite tests need an API server and an arv-git-httpd server
 type IntegrationSuite struct {
        tmpRepoRoot string
@@ -23,55 +29,43 @@ type IntegrationSuite struct {
 
 func (s *IntegrationSuite) TestPathVariants(c *check.C) {
        s.makeArvadosRepo(c)
-       // Spectator token
-       os.Setenv("ARVADOS_API_TOKEN", "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu")
        for _, repo := range []string{"active/foo.git", "active/foo/.git", "arvados.git", "arvados/.git"} {
-               err := s.runGit(c, "fetch", repo)
+               err := s.runGit(c, spectatorToken, "fetch", repo)
                c.Assert(err, check.Equals, nil)
        }
 }
 
 func (s *IntegrationSuite) TestReadonly(c *check.C) {
-       // Spectator token
-       os.Setenv("ARVADOS_API_TOKEN", "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu")
-       err := s.runGit(c, "fetch", "active/foo.git")
+       err := s.runGit(c, spectatorToken, "fetch", "active/foo.git")
        c.Assert(err, check.Equals, nil)
-       err = s.runGit(c, "push", "active/foo.git", "master:newbranchfail")
+       err = s.runGit(c, spectatorToken, "push", "active/foo.git", "master:newbranchfail")
        c.Assert(err, check.ErrorMatches, `.*HTTP code = 403.*`)
        _, err = os.Stat(s.tmpRepoRoot + "/zzzzz-s0uqq-382brsig8rp3666/.git/refs/heads/newbranchfail")
        c.Assert(err, check.FitsTypeOf, &os.PathError{})
 }
 
 func (s *IntegrationSuite) TestReadwrite(c *check.C) {
-       // Active user token
-       os.Setenv("ARVADOS_API_TOKEN", "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi")
-       err := s.runGit(c, "fetch", "active/foo.git")
+       err := s.runGit(c, activeToken, "fetch", "active/foo.git")
        c.Assert(err, check.Equals, nil)
-       err = s.runGit(c, "push", "active/foo.git", "master:newbranch")
+       err = s.runGit(c, activeToken, "push", "active/foo.git", "master:newbranch")
        c.Assert(err, check.Equals, nil)
        _, err = os.Stat(s.tmpRepoRoot + "/zzzzz-s0uqq-382brsig8rp3666/.git/refs/heads/newbranch")
        c.Assert(err, check.Equals, nil)
 }
 
 func (s *IntegrationSuite) TestNonexistent(c *check.C) {
-       // Spectator token
-       os.Setenv("ARVADOS_API_TOKEN", "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu")
-       err := s.runGit(c, "fetch", "thisrepodoesnotexist.git")
+       err := s.runGit(c, spectatorToken, "fetch", "thisrepodoesnotexist.git")
        c.Assert(err, check.ErrorMatches, `.* not found.*`)
 }
 
 func (s *IntegrationSuite) TestMissingGitdirReadableRepository(c *check.C) {
-       // Active user token
-       os.Setenv("ARVADOS_API_TOKEN", "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi")
-       err := s.runGit(c, "fetch", "active/foo2.git")
+       err := s.runGit(c, activeToken, "fetch", "active/foo2.git")
        c.Assert(err, check.ErrorMatches, `.* not found.*`)
 }
 
 func (s *IntegrationSuite) TestNoPermission(c *check.C) {
-       // Anonymous token
-       os.Setenv("ARVADOS_API_TOKEN", "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi")
        for _, repo := range []string{"active/foo.git", "active/foo/.git"} {
-               err := s.runGit(c, "fetch", repo)
+               err := s.runGit(c, anonymousToken, "fetch", repo)
                c.Assert(err, check.ErrorMatches, `.* not found.*`)
        }
 }
@@ -97,6 +91,17 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
        _, err = exec.Command("sh", "-c", "cd "+s.tmpWorkdir+" && echo work >work && git add work && git -c user.name=Foo -c user.email=Foo commit -am 'workdir: test'").CombinedOutput()
        c.Assert(err, check.Equals, nil)
 
+       _, err = exec.Command("git", "config",
+               "--file", s.tmpWorkdir+"/.git/config",
+               "credential.http://"+s.testServer.Addr+"/.helper",
+               "!cred(){ cat >/dev/null; if [ \"$1\" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred").Output()
+       c.Assert(err, check.Equals, nil)
+       _, err = exec.Command("git", "config",
+               "--file", s.tmpWorkdir+"/.git/config",
+               "credential.http://"+s.testServer.Addr+"/.username",
+               "none").Output()
+       c.Assert(err, check.Equals, nil)
+
        theConfig = &config{
                Addr:       ":",
                GitCommand: "/usr/bin/git",
@@ -107,18 +112,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
 
        // Clear ARVADOS_API_TOKEN after starting up the server, to
        // make sure arv-git-httpd doesn't use it.
-       os.Setenv("ARVADOS_API_TOKEN", "")
-
-       _, err = exec.Command("git", "config",
-               "--file", s.tmpWorkdir+"/.git/config",
-               "credential.http://"+s.testServer.Addr+"/.helper",
-               "!cred(){ echo password=$ARVADOS_API_TOKEN; };cred").Output()
-       c.Assert(err, check.Equals, nil)
-       _, err = exec.Command("git", "config",
-               "--file", s.tmpWorkdir+"/.git/config",
-               "credential.http://"+s.testServer.Addr+"/.username",
-               "none").Output()
-       c.Assert(err, check.Equals, nil)
+       os.Setenv("ARVADOS_API_TOKEN", "unused-token-placates-client-library")
 }
 
 func (s *IntegrationSuite) TearDownTest(c *check.C) {
@@ -137,7 +131,7 @@ func (s *IntegrationSuite) TearDownTest(c *check.C) {
        }
 }
 
-func (s *IntegrationSuite) runGit(c *check.C, gitCmd, repo string, args ...string) error {
+func (s *IntegrationSuite) runGit(c *check.C, token, gitCmd, repo string, args ...string) error {
        cwd, err := os.Getwd()
        c.Assert(err, check.Equals, nil)
        defer os.Chdir(cwd)
@@ -147,6 +141,7 @@ func (s *IntegrationSuite) runGit(c *check.C, gitCmd, repo string, args ...strin
                gitCmd, "http://" + s.testServer.Addr + "/" + repo,
        }, args...)
        cmd := exec.Command("git", gitargs...)
+       cmd.Env = append(os.Environ(), "ARVADOS_API_TOKEN="+token)
        w, err := cmd.StdinPipe()
        c.Assert(err, check.Equals, nil)
        w.Close()
@@ -165,9 +160,11 @@ func (s *IntegrationSuite) runGit(c *check.C, gitCmd, repo string, args ...strin
 
 // Make a bare arvados repo at {tmpRepoRoot}/arvados.git
 func (s *IntegrationSuite) makeArvadosRepo(c *check.C) {
-       _, err := exec.Command("git", "init", "--bare", s.tmpRepoRoot+"/zzzzz-s0uqq-arvadosrepo0123.git").Output()
+       msg, err := exec.Command("git", "init", "--bare", s.tmpRepoRoot+"/zzzzz-s0uqq-arvadosrepo0123.git").CombinedOutput()
+       c.Log(string(msg))
        c.Assert(err, check.Equals, nil)
-       _, err = exec.Command("git", "--git-dir", s.tmpRepoRoot+"/zzzzz-s0uqq-arvadosrepo0123.git", "fetch", "../../.git", "master:master").Output()
+       msg, err = exec.Command("git", "--git-dir", s.tmpRepoRoot+"/zzzzz-s0uqq-arvadosrepo0123.git", "fetch", "../../.git", "HEAD:master").CombinedOutput()
+       c.Log(string(msg))
        c.Assert(err, check.Equals, nil)
 }
 
diff --git a/services/crunchstat/.gitignore b/services/crunchstat/.gitignore
new file mode 100644 (file)
index 0000000..c26270a
--- /dev/null
@@ -0,0 +1 @@
+crunchstat
index 1a22e264f1ab734dafe052a589f86bfc2be6597e..e14912423db73483ef2623149e23d3ca63b3dabb 100644 (file)
@@ -86,11 +86,19 @@ var reportedStatFile = map[string]string{}
 // cgroup root for the given statgroup. (This will avoid falling back
 // to host-level stats during container setup and teardown.)
 func OpenStatFile(cgroup Cgroup, statgroup string, stat string) (*os.File, error) {
-       var paths = []string{
-               fmt.Sprintf("%s/%s/%s/%s/%s", cgroup.root, statgroup, cgroup.parent, cgroup.cid, stat),
-               fmt.Sprintf("%s/%s/%s/%s", cgroup.root, cgroup.parent, cgroup.cid, stat),
-               fmt.Sprintf("%s/%s/%s", cgroup.root, statgroup, stat),
-               fmt.Sprintf("%s/%s", cgroup.root, stat),
+       var paths []string
+       if cgroup.cid != "" {
+               // Collect container's stats
+               paths = []string{
+                       fmt.Sprintf("%s/%s/%s/%s/%s", cgroup.root, statgroup, cgroup.parent, cgroup.cid, stat),
+                       fmt.Sprintf("%s/%s/%s/%s", cgroup.root, cgroup.parent, cgroup.cid, stat),
+               }
+       } else {
+               // Collect this host's stats
+               paths = []string{
+                       fmt.Sprintf("%s/%s/%s", cgroup.root, statgroup, stat),
+                       fmt.Sprintf("%s/%s", cgroup.root, stat),
+               }
        }
        var path string
        var file *os.File
@@ -110,12 +118,14 @@ func OpenStatFile(cgroup Cgroup, statgroup string, stat string) (*os.File, error
                // whether we happen to collect stats [a] before any
                // processes have been created in the container and
                // [b] after all contained processes have exited.
-               reportedStatFile[stat] = path
                if path == "" {
-                       statLog.Printf("error finding stats file: stat %s, statgroup %s, cid %s, parent %s, root %s\n", stat, statgroup, cgroup.cid, cgroup.parent, cgroup.root)
+                       statLog.Printf("notice: stats not available: stat %s, statgroup %s, cid %s, parent %s, root %s\n", stat, statgroup, cgroup.cid, cgroup.parent, cgroup.root)
+               } else if ok {
+                       statLog.Printf("notice: stats moved from %s to %s\n", reportedStatFile[stat], path)
                } else {
-                       statLog.Printf("error reading stats from %s\n", path)
+                       statLog.Printf("notice: reading stats from %s\n", path)
                }
+               reportedStatFile[stat] = path
        }
        return file, err
 }
diff --git a/services/keepproxy/.gitignore b/services/keepproxy/.gitignore
new file mode 100644 (file)
index 0000000..a4c8ad9
--- /dev/null
@@ -0,0 +1 @@
+keepproxy
index 581f7f48739fd2fef826ef6b6dfae6c9a8baefec..ebdb6eb376535060c72600c4144fad1cee097a85 100644 (file)
@@ -1,6 +1,7 @@
 package main
 
 import (
+       "errors"
        "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
@@ -13,6 +14,7 @@ import (
        "net/http"
        "os"
        "os/signal"
+       "reflect"
        "sync"
        "syscall"
        "time"
@@ -105,7 +107,7 @@ func main() {
                log.Fatalf("Could not listen on %v", listen)
        }
 
-       go RefreshServicesList(&kc)
+       go RefreshServicesList(kc)
 
        // Shut down the server gracefully (by closing the listener)
        // if SIGTERM is received.
@@ -118,10 +120,10 @@ func main() {
        signal.Notify(term, syscall.SIGTERM)
        signal.Notify(term, syscall.SIGINT)
 
-       log.Printf("Arvados Keep proxy started listening on %v with server list %v", listener.Addr(), kc.ServiceRoots())
+       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
 
        // Start listening for requests.
-       http.Serve(listener, MakeRESTRouter(!no_get, !no_put, &kc))
+       http.Serve(listener, MakeRESTRouter(!no_get, !no_put, kc))
 
        log.Println("shutting down")
 }
@@ -134,27 +136,25 @@ type ApiTokenCache struct {
 
 // Refresh the keep service list every five minutes.
 func RefreshServicesList(kc *keepclient.KeepClient) {
-       var sleeptime time.Duration
+       var previousRoots = []map[string]string{}
+       var delay time.Duration = 0
        for {
-               oldservices := kc.ServiceRoots()
-               newservices, err := kc.DiscoverKeepServers()
-               if err == nil && len(newservices) > 0 {
-                       s1 := fmt.Sprint(oldservices)
-                       s2 := fmt.Sprint(newservices)
-                       if s1 != s2 {
-                               log.Printf("Updated server list to %v", s2)
-                       }
-                       sleeptime = 300 * time.Second
-               } else {
-                       // There was an error, or the list is empty, so wait 3 seconds and try again.
-                       if err != nil {
-                               log.Printf("Error retrieving server list: %v", err)
-                       } else {
-                               log.Printf("Retrieved an empty server list")
-                       }
-                       sleeptime = 3 * time.Second
+               time.Sleep(delay * time.Second)
+               delay = 300
+               if err := kc.DiscoverKeepServers(); err != nil {
+                       log.Println("Error retrieving services list:", err)
+                       delay = 3
+                       continue
+               }
+               newRoots := []map[string]string{kc.LocalRoots(), kc.GatewayRoots()}
+               if !reflect.DeepEqual(previousRoots, newRoots) {
+                       log.Printf("Updated services list: locals %v gateways %v", newRoots[0], newRoots[1])
+               }
+               if len(newRoots[0]) == 0 {
+                       log.Print("WARNING: No local services. Retrying in 3 seconds.")
+                       delay = 3
                }
-               time.Sleep(sleeptime)
+               previousRoots = newRoots
        }
 }
 
@@ -258,14 +258,14 @@ func MakeRESTRouter(
        rest := mux.NewRouter()
 
        if enable_get {
-               rest.Handle(`/{hash:[0-9a-f]{32}}+{hints}`,
+               rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`,
                        GetBlockHandler{kc, t}).Methods("GET", "HEAD")
-               rest.Handle(`/{hash:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
+               rest.Handle(`/{locator:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
        }
 
        if enable_put {
-               rest.Handle(`/{hash:[0-9a-f]{32}}+{hints}`, PutBlockHandler{kc, t}).Methods("PUT")
-               rest.Handle(`/{hash:[0-9a-f]{32}}`, PutBlockHandler{kc, t}).Methods("PUT")
+               rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`, PutBlockHandler{kc, t}).Methods("PUT")
+               rest.Handle(`/{locator:[0-9a-f]{32}}`, PutBlockHandler{kc, t}).Methods("PUT")
                rest.Handle(`/`, PutBlockHandler{kc, t}).Methods("POST")
                rest.Handle(`/{any}`, OptionsHandler{}).Methods("OPTIONS")
                rest.Handle(`/`, OptionsHandler{}).Methods("OPTIONS")
@@ -293,22 +293,32 @@ func (this OptionsHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request
        SetCorsHeaders(resp)
 }
 
+var BadAuthorizationHeader = errors.New("Missing or invalid Authorization header")
+var ContentLengthMismatch = errors.New("Actual length != expected content length")
+var MethodNotSupported = errors.New("Method not supported")
+
 func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        SetCorsHeaders(resp)
 
-       kc := *this.KeepClient
-
-       hash := mux.Vars(req)["hash"]
-       hints := mux.Vars(req)["hints"]
-
-       locator := keepclient.MakeLocator2(hash, hints)
+       locator := mux.Vars(req)["locator"]
+       var err error
+       var status int
+       var expectLength, responseLength int64
+       var proxiedURI = "-"
+
+       defer func() {
+               log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, responseLength, proxiedURI, err)
+               if status != http.StatusOK {
+                       http.Error(resp, err.Error(), status)
+               }
+       }()
 
-       log.Printf("%s: %s %s begin", GetRemoteAddress(req), req.Method, hash)
+       kc := *this.KeepClient
 
        var pass bool
        var tok string
        if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
-               http.Error(resp, "Missing or invalid Authorization header", http.StatusForbidden)
+               status, err = http.StatusForbidden, BadAuthorizationHeader
                return
        }
 
@@ -318,92 +328,97 @@ func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
        kc.Arvados = &arvclient
 
        var reader io.ReadCloser
-       var err error
-       var blocklen int64
 
-       if req.Method == "GET" {
-               reader, blocklen, _, err = kc.AuthorizedGet(hash, locator.Signature, locator.Timestamp)
+       switch req.Method {
+       case "HEAD":
+               expectLength, proxiedURI, err = kc.Ask(locator)
+       case "GET":
+               reader, expectLength, proxiedURI, err = kc.Get(locator)
                if reader != nil {
                        defer reader.Close()
                }
-       } else if req.Method == "HEAD" {
-               blocklen, _, err = kc.AuthorizedAsk(hash, locator.Signature, locator.Timestamp)
+       default:
+               status, err = http.StatusNotImplemented, MethodNotSupported
+               return
        }
 
-       if blocklen == -1 {
-               log.Printf("%s: %s %s Keep server did not return Content-Length",
-                       GetRemoteAddress(req), req.Method, hash)
+       if expectLength == -1 {
+               log.Println("Warning:", GetRemoteAddress(req), req.Method, proxiedURI, "Content-Length not provided")
        }
 
-       var status = 0
        switch err {
        case nil:
                status = http.StatusOK
-               resp.Header().Set("Content-Length", fmt.Sprint(blocklen))
-               if reader != nil {
-                       n, err2 := io.Copy(resp, reader)
-                       if blocklen > -1 && n != blocklen {
-                               log.Printf("%s: %s %s %v %v mismatched copy size expected Content-Length: %v",
-                                       GetRemoteAddress(req), req.Method, hash, status, n, blocklen)
-                       } else if err2 == nil {
-                               log.Printf("%s: %s %s %v %v",
-                                       GetRemoteAddress(req), req.Method, hash, status, n)
-                       } else {
-                               log.Printf("%s: %s %s %v %v copy error: %v",
-                                       GetRemoteAddress(req), req.Method, hash, status, n, err2.Error())
+               resp.Header().Set("Content-Length", fmt.Sprint(expectLength))
+               switch req.Method {
+               case "HEAD":
+                       responseLength = 0
+               case "GET":
+                       responseLength, err = io.Copy(resp, reader)
+                       if err == nil && expectLength > -1 && responseLength != expectLength {
+                               err = ContentLengthMismatch
                        }
-               } else {
-                       log.Printf("%s: %s %s %v 0", GetRemoteAddress(req), req.Method, hash, status)
                }
        case keepclient.BlockNotFound:
                status = http.StatusNotFound
-               http.Error(resp, "Not Found", http.StatusNotFound)
        default:
                status = http.StatusBadGateway
-               http.Error(resp, err.Error(), http.StatusBadGateway)
-       }
-
-       if err != nil {
-               log.Printf("%s: %s %s %v error: %v",
-                       GetRemoteAddress(req), req.Method, hash, status, err.Error())
        }
 }
 
+var LengthRequiredError = errors.New(http.StatusText(http.StatusLengthRequired))
+var LengthMismatchError = errors.New("Locator size hint does not match Content-Length header")
+
 func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        SetCorsHeaders(resp)
 
        kc := *this.KeepClient
+       var err error
+       var expectLength int64 = -1
+       var status = http.StatusInternalServerError
+       var wroteReplicas int
+       var locatorOut string = "-"
+
+       defer func() {
+               log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, kc.Want_replicas, wroteReplicas, locatorOut, err)
+               if status != http.StatusOK {
+                       http.Error(resp, err.Error(), status)
+               }
+       }()
 
-       hash := mux.Vars(req)["hash"]
-       hints := mux.Vars(req)["hints"]
-
-       locator := keepclient.MakeLocator2(hash, hints)
+       locatorIn := mux.Vars(req)["locator"]
 
-       var contentLength int64 = -1
        if req.Header.Get("Content-Length") != "" {
-               _, err := fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &contentLength)
+               _, err := fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &expectLength)
                if err != nil {
-                       resp.Header().Set("Content-Length", fmt.Sprintf("%d", contentLength))
+                       resp.Header().Set("Content-Length", fmt.Sprintf("%d", expectLength))
                }
 
        }
 
-       log.Printf("%s: %s %s Content-Length %v", GetRemoteAddress(req), req.Method, hash, contentLength)
-
-       if contentLength < 0 {
-               http.Error(resp, "Must include Content-Length header", http.StatusLengthRequired)
+       if expectLength < 0 {
+               err = LengthRequiredError
+               status = http.StatusLengthRequired
                return
        }
 
-       if locator.Size > 0 && int64(locator.Size) != contentLength {
-               http.Error(resp, "Locator size hint does not match Content-Length header", http.StatusBadRequest)
-               return
+       if locatorIn != "" {
+               var loc *keepclient.Locator
+               if loc, err = keepclient.MakeLocator(locatorIn); err != nil {
+                       status = http.StatusBadRequest
+                       return
+               } else if loc.Size > 0 && int64(loc.Size) != expectLength {
+                       err = LengthMismatchError
+                       status = http.StatusBadRequest
+                       return
+               }
        }
 
        var pass bool
        var tok string
        if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
-               http.Error(resp, "Missing or invalid Authorization header", http.StatusForbidden)
+               err = BadAuthorizationHeader
+               status = http.StatusForbidden
                return
        }
 
@@ -422,57 +437,42 @@ func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
        }
 
        // Now try to put the block through
-       var replicas int
-       var put_err error
-       if hash == "" {
+       if locatorIn == "" {
                if bytes, err := ioutil.ReadAll(req.Body); err != nil {
-                       msg := fmt.Sprintf("Error reading request body: %s", err)
-                       log.Printf(msg)
-                       http.Error(resp, msg, http.StatusInternalServerError)
+                       err = errors.New(fmt.Sprintf("Error reading request body: %s", err))
+                       status = http.StatusInternalServerError
                        return
                } else {
-                       hash, replicas, put_err = kc.PutB(bytes)
+                       locatorOut, wroteReplicas, err = kc.PutB(bytes)
                }
        } else {
-               hash, replicas, put_err = kc.PutHR(hash, req.Body, contentLength)
+               locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength)
        }
 
        // Tell the client how many successful PUTs we accomplished
-       resp.Header().Set(keepclient.X_Keep_Replicas_Stored, fmt.Sprintf("%d", replicas))
+       resp.Header().Set(keepclient.X_Keep_Replicas_Stored, fmt.Sprintf("%d", wroteReplicas))
 
-       switch put_err {
+       switch err {
        case nil:
-               // Default will return http.StatusOK
-               log.Printf("%s: %s %s finished, stored %v replicas (desired %v)", GetRemoteAddress(req), req.Method, hash, replicas, kc.Want_replicas)
-               n, err2 := io.WriteString(resp, hash)
-               if err2 != nil {
-                       log.Printf("%s: wrote %v bytes to response body and got error %v", n, err2.Error())
-               }
+               status = http.StatusOK
+               _, err = io.WriteString(resp, locatorOut)
 
        case keepclient.OversizeBlockError:
                // Too much data
-               http.Error(resp, fmt.Sprintf("Exceeded maximum blocksize %d", keepclient.BLOCKSIZE), http.StatusRequestEntityTooLarge)
+               status = http.StatusRequestEntityTooLarge
 
        case keepclient.InsufficientReplicasError:
-               if replicas > 0 {
+               if wroteReplicas > 0 {
                        // At least one write is considered success.  The
                        // client can decide if getting less than the number of
                        // replications it asked for is a fatal error.
-                       // Default will return http.StatusOK
-                       n, err2 := io.WriteString(resp, hash)
-                       if err2 != nil {
-                               log.Printf("%s: wrote %v bytes to response body and got error %v", n, err2.Error())
-                       }
+                       status = http.StatusOK
+                       _, err = io.WriteString(resp, locatorOut)
                } else {
-                       http.Error(resp, put_err.Error(), http.StatusServiceUnavailable)
+                       status = http.StatusServiceUnavailable
                }
 
        default:
-               http.Error(resp, put_err.Error(), http.StatusBadGateway)
-       }
-
-       if put_err != nil {
-               log.Printf("%s: %s %s stored %v replicas (desired %v) got error %v", GetRemoteAddress(req), req.Method, hash, replicas, kc.Want_replicas, put_err.Error())
+               status = http.StatusBadGateway
        }
-
 }
index e3b4e36b63de23dee806a35a9c9d55958cbc3afd..5f6e2b9bc8eda0ef2d958071301e949a277b57c0 100644 (file)
@@ -117,10 +117,10 @@ func runProxy(c *C, args []string, port int, bogusClientToken bool) keepclient.K
        }
        kc.SetServiceRoots(map[string]string{
                "proxy": fmt.Sprintf("http://localhost:%v", port),
-       })
+       }, nil)
        c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.ServiceRoots()), Equals, 1)
-       for _, root := range kc.ServiceRoots() {
+       c.Check(len(kc.LocalRoots()), Equals, 1)
+       for _, root := range kc.LocalRoots() {
                c.Check(root, Equals, fmt.Sprintf("http://localhost:%v", port))
        }
        log.Print("keepclient created")
@@ -154,8 +154,8 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
        c.Assert(err, Equals, nil)
        c.Check(kc.Arvados.External, Equals, true)
        c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.ServiceRoots()), Equals, 1)
-       for _, root := range kc.ServiceRoots() {
+       c.Check(len(kc.LocalRoots()), Equals, 1)
+       for _, root := range kc.LocalRoots() {
                c.Check(root, Equals, "http://localhost:29950")
        }
        os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
diff --git a/services/keepstore/.gitignore b/services/keepstore/.gitignore
new file mode 100644 (file)
index 0000000..c195c4a
--- /dev/null
@@ -0,0 +1 @@
+keepstore
diff --git a/services/keepstore/bufferpool.go b/services/keepstore/bufferpool.go
new file mode 100644 (file)
index 0000000..373bfc7
--- /dev/null
@@ -0,0 +1,44 @@
+package main
+
+import (
+       "log"
+       "sync"
+       "time"
+)
+
+type bufferPool struct {
+       // limiter has a "true" placeholder for each in-use buffer.
+       limiter chan bool
+       // Pool has unused buffers.
+       sync.Pool
+}
+
+func newBufferPool(count int, bufSize int) *bufferPool {
+       p := bufferPool{}
+       p.New = func() interface{} {
+               return make([]byte, bufSize)
+       }
+       p.limiter = make(chan bool, count)
+       return &p
+}
+
+func (p *bufferPool) Get(size int) []byte {
+       select {
+       case p.limiter <- true:
+       default:
+               t0 := time.Now()
+               log.Printf("reached max buffers (%d), waiting", cap(p.limiter))
+               p.limiter <- true
+               log.Printf("waited %v for a buffer", time.Since(t0))
+       }
+       buf := p.Pool.Get().([]byte)
+       if cap(buf) < size {
+               log.Fatalf("bufferPool Get(size=%d) but max=%d", size, cap(buf))
+       }
+       return buf[:size]
+}
+
+func (p *bufferPool) Put(buf []byte) {
+       p.Pool.Put(buf)
+       <-p.limiter
+}
diff --git a/services/keepstore/bufferpool_test.go b/services/keepstore/bufferpool_test.go
new file mode 100644 (file)
index 0000000..b2f63b1
--- /dev/null
@@ -0,0 +1,85 @@
+package main
+
+import (
+       . "gopkg.in/check.v1"
+       "testing"
+       "time"
+)
+
+// Gocheck boilerplate
+func TestBufferPool(t *testing.T) {
+       TestingT(t)
+}
+var _ = Suite(&BufferPoolSuite{})
+type BufferPoolSuite struct {}
+
+// Initialize a default-sized buffer pool for the benefit of test
+// suites that don't run main().
+func init() {
+       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+}
+
+func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
+       bufs := newBufferPool(2, 10)
+       b1 := bufs.Get(1)
+       bufs.Get(2)
+       bufs.Put(b1)
+       b3 := bufs.Get(3)
+       c.Check(len(b3), Equals, 3)
+}
+
+func (s *BufferPoolSuite) TestBufferPoolUnderLimit(c *C) {
+       bufs := newBufferPool(3, 10)
+       b1 := bufs.Get(10)
+       bufs.Get(10)
+       testBufferPoolRace(c, bufs, b1, "Get")
+}
+
+func (s *BufferPoolSuite) TestBufferPoolAtLimit(c *C) {
+       bufs := newBufferPool(2, 10)
+       b1 := bufs.Get(10)
+       bufs.Get(10)
+       testBufferPoolRace(c, bufs, b1, "Put")
+}
+
+func testBufferPoolRace(c *C, bufs *bufferPool, unused []byte, expectWin string) {
+       race := make(chan string)
+       go func() {
+               bufs.Get(10)
+               time.Sleep(time.Millisecond)
+               race <- "Get"
+       }()
+       go func() {
+               time.Sleep(10*time.Millisecond)
+               bufs.Put(unused)
+               race <- "Put"
+       }()
+       c.Check(<-race, Equals, expectWin)
+       c.Check(<-race, Not(Equals), expectWin)
+       close(race)
+}
+
+func (s *BufferPoolSuite) TestBufferPoolReuse(c *C) {
+       bufs := newBufferPool(2, 10)
+       bufs.Get(10)
+       last := bufs.Get(10)
+       // The buffer pool is allowed to throw away unused buffers
+       // (e.g., during sync.Pool's garbage collection hook, in the
+       // the current implementation). However, if unused buffers are
+       // getting thrown away and reallocated more than {arbitrary
+       // frequency threshold} during a busy loop, it's not acting
+       // much like a buffer pool.
+       allocs := 1000
+       reuses := 0
+       for i := 0; i < allocs; i++ {
+               bufs.Put(last)
+               next := bufs.Get(10)
+               copy(last, []byte("last"))
+               copy(next, []byte("next"))
+               if last[0] == 'n' {
+                       reuses++
+               }
+               last = next
+       }
+       c.Check(reuses > allocs * 95/100, Equals, true)
+}
index 9ad77bfba7d66aab717dc1fbfd1dc37bc57a6edc..6823ad0fc68f9b86b78a298648d204086aa3b95c 100644 (file)
@@ -43,9 +43,9 @@ func TestGetHandler(t *testing.T) {
 
        // Prepare two test Keep volumes. Our block is stored on the second volume.
        KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
        if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
                t.Error(err)
        }
@@ -54,11 +54,11 @@ func TestGetHandler(t *testing.T) {
        // Turn on permission settings so we can generate signed locators.
        enforce_permissions = true
        PermissionSecret = []byte(known_key)
-       permission_ttl = time.Duration(300) * time.Second
+       blob_signature_ttl = 300 * time.Second
 
        var (
                unsigned_locator  = "/" + TEST_HASH
-               valid_timestamp   = time.Now().Add(permission_ttl)
+               valid_timestamp   = time.Now().Add(blob_signature_ttl)
                expired_timestamp = time.Now().Add(-time.Hour)
                signed_locator    = "/" + SignLocator(TEST_HASH, known_token, valid_timestamp)
                expired_locator   = "/" + SignLocator(TEST_HASH, known_token, expired_timestamp)
@@ -151,7 +151,7 @@ func TestPutHandler(t *testing.T) {
 
        // Prepare two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
 
        // --------------
        // No server key.
@@ -176,7 +176,7 @@ func TestPutHandler(t *testing.T) {
        // With a server key.
 
        PermissionSecret = []byte(known_key)
-       permission_ttl = time.Duration(300) * time.Second
+       blob_signature_ttl = 300 * time.Second
 
        // When a permission key is available, the locator returned
        // from an authenticated PUT request will be signed.
@@ -218,6 +218,46 @@ func TestPutHandler(t *testing.T) {
                TEST_HASH_PUT_RESPONSE, response)
 }
 
+func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
+       defer teardown()
+       data_manager_token = "fake-data-manager-token"
+       vols := []*MockVolume{CreateMockVolume(), CreateMockVolume()}
+       vols[0].Readonly = true
+       KeepVM = MakeRRVolumeManager([]Volume{vols[0], vols[1]})
+       defer KeepVM.Close()
+       IssueRequest(
+               &RequestTester{
+                       method:       "PUT",
+                       uri:          "/" + TEST_HASH,
+                       request_body: TEST_BLOCK,
+               })
+       IssueRequest(
+               &RequestTester{
+                       method:       "DELETE",
+                       uri:          "/" + TEST_HASH,
+                       request_body: TEST_BLOCK,
+                       api_token:    data_manager_token,
+               })
+       type expect struct {
+               volnum    int
+               method    string
+               callcount int
+       }
+       for _, e := range []expect{
+               {0, "Get", 0},
+               {0, "Touch", 0},
+               {0, "Put", 0},
+               {0, "Delete", 0},
+               {1, "Get", 1},
+               {1, "Put", 1},
+               {1, "Delete", 1},
+       } {
+               if calls := vols[e.volnum].CallCount(e.method); calls != e.callcount {
+                       t.Errorf("Got %d %s() on vol %d, expect %d", calls, e.method, e.volnum, e.callcount)
+               }
+       }
+}
+
 // Test /index requests:
 //   - unauthenticated /index request
 //   - unauthenticated /index/prefix request
@@ -236,9 +276,9 @@ func TestIndexHandler(t *testing.T) {
        // Include multiple blocks on different volumes, and
        // some metadata files (which should be omitted from index listings)
        KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
        vols[0].Put(TEST_HASH, TEST_BLOCK)
        vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
        vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
@@ -395,15 +435,15 @@ func TestDeleteHandler(t *testing.T) {
        // Include multiple blocks on different volumes, and
        // some metadata files (which should be omitted from index listings)
        KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
        vols[0].Put(TEST_HASH, TEST_BLOCK)
 
-       // Explicitly set the permission_ttl to 0 for these
+       // Explicitly set the blob_signature_ttl to 0 for these
        // tests, to ensure the MockVolume deletes the blocks
        // even though they have just been created.
-       permission_ttl = time.Duration(0)
+       blob_signature_ttl = time.Duration(0)
 
        var user_token = "NOT DATA MANAGER TOKEN"
        data_manager_token = "DATA MANAGER TOKEN"
@@ -488,10 +528,10 @@ func TestDeleteHandler(t *testing.T) {
                t.Error("superuser_existing_block_req: block not deleted")
        }
 
-       // A DELETE request on a block newer than permission_ttl should return
-       // success but leave the block on the volume.
+       // A DELETE request on a block newer than blob_signature_ttl
+       // should return success but leave the block on the volume.
        vols[0].Put(TEST_HASH, TEST_BLOCK)
-       permission_ttl = time.Duration(1) * time.Hour
+       blob_signature_ttl = time.Hour
 
        response = IssueRequest(superuser_existing_block_req)
        ExpectStatusCode(t,
@@ -596,7 +636,7 @@ func TestPullHandler(t *testing.T) {
                        "Invalid pull request from the data manager",
                        RequestTester{"/pull", data_manager_token, "PUT", bad_json},
                        http.StatusBadRequest,
-                       "Bad Request\n",
+                       "",
                },
        }
 
@@ -700,7 +740,7 @@ func TestTrashHandler(t *testing.T) {
                        "Invalid trash list from the data manager",
                        RequestTester{"/trash", data_manager_token, "PUT", bad_json},
                        http.StatusBadRequest,
-                       "Bad Request\n",
+                       "",
                },
        }
 
@@ -758,7 +798,7 @@ func ExpectBody(
        testname string,
        expected_body string,
        response *httptest.ResponseRecorder) {
-       if response.Body.String() != expected_body {
+       if expected_body != "" && response.Body.String() != expected_body {
                t.Errorf("%s: expected response body '%s', got %+v",
                        testname, expected_body, response)
        }
index f120f05dc454086580c96df6d9960e9cf97491c3..cf5dfcac57a2e93b1bde9fa8a2bfcc60d01a186c 100644 (file)
@@ -8,7 +8,6 @@ package main
 // StatusHandler   (GET /status.json)
 
 import (
-       "bufio"
        "bytes"
        "container/list"
        "crypto/md5"
@@ -41,35 +40,19 @@ func MakeRESTRouter() *mux.Router {
 
        rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, PutBlockHandler).Methods("PUT")
        rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, DeleteHandler).Methods("DELETE")
-
-       // For IndexHandler we support:
-       //   /index           - returns all locators
-       //   /index/{prefix}  - returns all locators that begin with {prefix}
-       //      {prefix} is a string of hexadecimal digits between 0 and 32 digits.
-       //      If {prefix} is the empty string, return an index of all locators
-       //      (so /index and /index/ behave identically)
-       //      A client may supply a full 32-digit locator string, in which
-       //      case the server will return an index with either zero or one
-       //      entries. This usage allows a client to check whether a block is
-       //      present, and its size and upload time, without retrieving the
-       //      entire block.
-       //
+       // List all blocks stored here. Privileged client only.
        rest.HandleFunc(`/index`, IndexHandler).Methods("GET", "HEAD")
-       rest.HandleFunc(
-               `/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
+       // List blocks stored here whose hash has the given prefix.
+       // Privileged client only.
+       rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
+
+       // List volumes: path, device number, bytes used/avail.
        rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD")
 
-       // The PullHandler and TrashHandler process "PUT /pull" and "PUT
-       // /trash" requests from Data Manager.  These requests instruct
-       // Keep to replicate or delete blocks; see
-       // https://arvados.org/projects/arvados/wiki/Keep_Design_Doc
-       // for more details.
-       //
-       // Each handler parses the JSON list of block management requests
-       // in the message body, and replaces any existing pull queue or
-       // trash queue with their contentes.
-       //
+       // Replace the current pull queue.
        rest.HandleFunc(`/pull`, PullHandler).Methods("PUT")
+
+       // Replace the current trash queue.
        rest.HandleFunc(`/trash`, TrashHandler).Methods("PUT")
 
        // Any request which does not match any of these routes gets
@@ -83,38 +66,6 @@ func BadRequestHandler(w http.ResponseWriter, r *http.Request) {
        http.Error(w, BadRequestError.Error(), BadRequestError.HTTPCode)
 }
 
-// FindKeepVolumes scans all mounted volumes on the system for Keep
-// volumes, and returns a list of matching paths.
-//
-// A device is assumed to be a Keep volume if it is a normal or tmpfs
-// volume and has a "/keep" directory directly underneath the mount
-// point.
-//
-func FindKeepVolumes() []string {
-       vols := make([]string, 0)
-
-       if f, err := os.Open(PROC_MOUNTS); err != nil {
-               log.Fatalf("opening %s: %s\n", PROC_MOUNTS, err)
-       } else {
-               scanner := bufio.NewScanner(f)
-               for scanner.Scan() {
-                       args := strings.Fields(scanner.Text())
-                       dev, mount := args[0], args[1]
-                       if mount != "/" &&
-                               (dev == "tmpfs" || strings.HasPrefix(dev, "/dev/")) {
-                               keep := mount + "/keep"
-                               if st, err := os.Stat(keep); err == nil && st.IsDir() {
-                                       vols = append(vols, keep)
-                               }
-                       }
-               }
-               if err := scanner.Err(); err != nil {
-                       log.Fatal(err)
-               }
-       }
-       return vols
-}
-
 func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
        hash := mux.Vars(req)["hash"]
 
@@ -161,25 +112,17 @@ func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
        }
 
        block, err := GetBlock(hash, false)
-
-       // Garbage collect after each GET. Fixes #2865.
-       // TODO(twp): review Keep memory usage and see if there's
-       // a better way to do this than blindly garbage collecting
-       // after every block.
-       defer runtime.GC()
-
        if err != nil {
                // This type assertion is safe because the only errors
                // GetBlock can return are DiskHashError or NotFoundError.
                http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
                return
        }
+       defer bufs.Put(block)
 
-       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(block)))
-
-       _, err = resp.Write(block)
-
-       return
+       resp.Header().Set("Content-Length", strconv.Itoa(len(block)))
+       resp.Header().Set("Content-Type", "application/octet-stream")
+       resp.Write(block)
 }
 
 func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
@@ -189,37 +132,51 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
 
        hash := mux.Vars(req)["hash"]
 
-       // Read the block data to be stored.
-       // If the request exceeds BLOCKSIZE bytes, issue a HTTP 500 error.
-       //
+       // Detect as many error conditions as possible before reading
+       // the body: avoid transmitting data that will not end up
+       // being written anyway.
+
+       if req.ContentLength == -1 {
+               http.Error(resp, SizeRequiredError.Error(), SizeRequiredError.HTTPCode)
+               return
+       }
+
        if req.ContentLength > BLOCKSIZE {
                http.Error(resp, TooLongError.Error(), TooLongError.HTTPCode)
                return
        }
 
-       buf := make([]byte, req.ContentLength)
-       nread, err := io.ReadFull(req.Body, buf)
+       if len(KeepVM.AllWritable()) == 0 {
+               http.Error(resp, FullError.Error(), FullError.HTTPCode)
+               return
+       }
+
+       buf := bufs.Get(int(req.ContentLength))
+       _, err := io.ReadFull(req.Body, buf)
        if err != nil {
                http.Error(resp, err.Error(), 500)
-       } else if int64(nread) < req.ContentLength {
-               http.Error(resp, "request truncated", 500)
-       } else {
-               if err := PutBlock(buf, hash); err == nil {
-                       // Success; add a size hint, sign the locator if
-                       // possible, and return it to the client.
-                       return_hash := fmt.Sprintf("%s+%d", hash, len(buf))
-                       api_token := GetApiToken(req)
-                       if PermissionSecret != nil && api_token != "" {
-                               expiry := time.Now().Add(permission_ttl)
-                               return_hash = SignLocator(return_hash, api_token, expiry)
-                       }
-                       resp.Write([]byte(return_hash + "\n"))
-               } else {
-                       ke := err.(*KeepError)
-                       http.Error(resp, ke.Error(), ke.HTTPCode)
-               }
+               bufs.Put(buf)
+               return
        }
-       return
+
+       err = PutBlock(buf, hash)
+       bufs.Put(buf)
+
+       if err != nil {
+               ke := err.(*KeepError)
+               http.Error(resp, ke.Error(), ke.HTTPCode)
+               return
+       }
+
+       // Success; add a size hint, sign the locator if possible, and
+       // return it to the client.
+       return_hash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
+       api_token := GetApiToken(req)
+       if PermissionSecret != nil && api_token != "" {
+               expiry := time.Now().Add(blob_signature_ttl)
+               return_hash = SignLocator(return_hash, api_token, expiry)
+       }
+       resp.Write([]byte(return_hash + "\n"))
 }
 
 // IndexHandler
@@ -234,11 +191,18 @@ func IndexHandler(resp http.ResponseWriter, req *http.Request) {
 
        prefix := mux.Vars(req)["prefix"]
 
-       var index string
-       for _, vol := range KeepVM.Volumes() {
-               index = index + vol.Index(prefix)
+       for _, vol := range KeepVM.AllReadable() {
+               if err := vol.IndexTo(prefix, resp); err != nil {
+                       // The only errors returned by IndexTo are
+                       // write errors returned by resp.Write(),
+                       // which probably means the client has
+                       // disconnected and this error will never be
+                       // reported to the client -- but it will
+                       // appear in our own error log.
+                       http.Error(resp, err.Error(), http.StatusInternalServerError)
+                       return
+               }
        }
-       resp.Write([]byte(index))
 }
 
 // StatusHandler
@@ -282,8 +246,8 @@ func StatusHandler(resp http.ResponseWriter, req *http.Request) {
 func GetNodeStatus() *NodeStatus {
        st := new(NodeStatus)
 
-       st.Volumes = make([]*VolumeStatus, len(KeepVM.Volumes()))
-       for i, vol := range KeepVM.Volumes() {
+       st.Volumes = make([]*VolumeStatus, len(KeepVM.AllReadable()))
+       for i, vol := range KeepVM.AllReadable() {
                st.Volumes[i] = vol.Status()
        }
        return st
@@ -358,14 +322,14 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       // Delete copies of this block from all available volumes.  Report
-       // how many blocks were successfully and unsuccessfully
-       // deleted.
+       // Delete copies of this block from all available volumes.
+       // Report how many blocks were successfully deleted, and how
+       // many were found on writable volumes but not deleted.
        var result struct {
                Deleted int `json:"copies_deleted"`
                Failed  int `json:"copies_failed"`
        }
-       for _, vol := range KeepVM.Volumes() {
+       for _, vol := range KeepVM.AllWritable() {
                if err := vol.Delete(hash); err == nil {
                        result.Deleted++
                } else if os.IsNotExist(err) {
@@ -445,7 +409,7 @@ func PullHandler(resp http.ResponseWriter, req *http.Request) {
        var pr []PullRequest
        r := json.NewDecoder(req.Body)
        if err := r.Decode(&pr); err != nil {
-               http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
+               http.Error(resp, err.Error(), BadRequestError.HTTPCode)
                return
        }
 
@@ -479,7 +443,7 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) {
        var trash []TrashRequest
        r := json.NewDecoder(req.Body)
        if err := r.Decode(&trash); err != nil {
-               http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
+               http.Error(resp, err.Error(), BadRequestError.HTTPCode)
                return
        }
 
@@ -528,52 +492,54 @@ func GetBlock(hash string, update_timestamp bool) ([]byte, error) {
        // Attempt to read the requested hash from a keep volume.
        error_to_caller := NotFoundError
 
-       for _, vol := range KeepVM.Volumes() {
-               if buf, err := vol.Get(hash); err != nil {
-                       // IsNotExist is an expected error and may be ignored.
-                       // (If all volumes report IsNotExist, we return a NotFoundError)
-                       // All other errors should be logged but we continue trying to
-                       // read.
-                       switch {
-                       case os.IsNotExist(err):
-                               continue
-                       default:
+       var vols []Volume
+       if update_timestamp {
+               // Pointless to find the block on an unwritable volume
+               // because Touch() will fail -- this is as good as
+               // "not found" for purposes of callers who need to
+               // update_timestamp.
+               vols = KeepVM.AllWritable()
+       } else {
+               vols = KeepVM.AllReadable()
+       }
+
+       for _, vol := range vols {
+               buf, err := vol.Get(hash)
+               if err != nil {
+                       // IsNotExist is an expected error and may be
+                       // ignored. All other errors are logged. In
+                       // any case we continue trying to read other
+                       // volumes. If all volumes report IsNotExist,
+                       // we return a NotFoundError.
+                       if !os.IsNotExist(err) {
                                log.Printf("GetBlock: reading %s: %s\n", hash, err)
                        }
-               } else {
-                       // Double check the file checksum.
-                       //
-                       filehash := fmt.Sprintf("%x", md5.Sum(buf))
-                       if filehash != hash {
-                               // TODO(twp): this condition probably represents a bad disk and
-                               // should raise major alarm bells for an administrator: e.g.
-                               // they should be sent directly to an event manager at high
-                               // priority or logged as urgent problems.
-                               //
-                               log.Printf("%s: checksum mismatch for request %s (actual %s)\n",
-                                       vol, hash, filehash)
-                               error_to_caller = DiskHashError
-                       } else {
-                               // Success!
-                               if error_to_caller != NotFoundError {
-                                       log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned\n",
-                                               vol, hash)
-                               }
-                               // Update the timestamp if the caller requested.
-                               // If we could not update the timestamp, continue looking on
-                               // other volumes.
-                               if update_timestamp {
-                                       if vol.Touch(hash) != nil {
-                                               continue
-                                       }
-                               }
-                               return buf, nil
+                       continue
+               }
+               // Check the file checksum.
+               //
+               filehash := fmt.Sprintf("%x", md5.Sum(buf))
+               if filehash != hash {
+                       // TODO: Try harder to tell a sysadmin about
+                       // this.
+                       log.Printf("%s: checksum mismatch for request %s (actual %s)\n",
+                               vol, hash, filehash)
+                       error_to_caller = DiskHashError
+                       continue
+               }
+               if error_to_caller == DiskHashError {
+                       log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned",
+                               vol, hash)
+               }
+               if update_timestamp {
+                       if err := vol.Touch(hash); err != nil {
+                               error_to_caller = GenericError
+                               log.Printf("%s: Touch %s failed: %s",
+                                       vol, hash, error_to_caller)
+                               continue
                        }
                }
-       }
-
-       if error_to_caller != NotFoundError {
-               log.Printf("%s: checksum mismatch, no good copy found\n", hash)
+               return buf, nil
        }
        return nil, error_to_caller
 }
@@ -630,31 +596,39 @@ func PutBlock(block []byte, hash string) error {
 
        // Choose a Keep volume to write to.
        // If this volume fails, try all of the volumes in order.
-       vol := KeepVM.Choose()
-       if err := vol.Put(hash, block); err == nil {
-               return nil // success!
-       } else {
-               allFull := true
-               for _, vol := range KeepVM.Volumes() {
-                       err := vol.Put(hash, block)
-                       if err == nil {
-                               return nil // success!
-                       }
-                       if err != FullError {
-                               // The volume is not full but the write did not succeed.
-                               // Report the error and continue trying.
-                               allFull = false
-                               log.Printf("%s: Write(%s): %s\n", vol, hash, err)
-                       }
+       if vol := KeepVM.NextWritable(); vol != nil {
+               if err := vol.Put(hash, block); err == nil {
+                       return nil // success!
                }
+       }
 
-               if allFull {
-                       log.Printf("all Keep volumes full")
-                       return FullError
-               } else {
-                       log.Printf("all Keep volumes failed")
-                       return GenericError
+       writables := KeepVM.AllWritable()
+       if len(writables) == 0 {
+               log.Print("No writable volumes.")
+               return FullError
+       }
+
+       allFull := true
+       for _, vol := range writables {
+               err := vol.Put(hash, block)
+               if err == nil {
+                       return nil // success!
                }
+               if err != FullError {
+                       // The volume is not full but the
+                       // write did not succeed.  Report the
+                       // error and continue trying.
+                       allFull = false
+                       log.Printf("%s: Write(%s): %s\n", vol, hash, err)
+               }
+       }
+
+       if allFull {
+               log.Print("All volumes are full.")
+               return FullError
+       } else {
+               // Already logged the non-full errors.
+               return GenericError
        }
 }
 
index a363bac2553998e6356216f77472bcbf537b78d3..06b2f6fa28a3f6af0f4e9fef1861a5b65787d58d 100644 (file)
@@ -1,7 +1,9 @@
 package main
 
 import (
+       "bufio"
        "bytes"
+       "errors"
        "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
@@ -37,23 +39,26 @@ var PROC_MOUNTS = "/proc/mounts"
 
 // enforce_permissions controls whether permission signatures
 // should be enforced (affecting GET and DELETE requests).
-// Initialized by the --enforce-permissions flag.
+// Initialized by the -enforce-permissions flag.
 var enforce_permissions bool
 
-// permission_ttl is the time duration for which new permission
+// blob_signature_ttl is the time duration for which new permission
 // signatures (returned by PUT requests) will be valid.
-// Initialized by the --permission-ttl flag.
-var permission_ttl time.Duration
+// Initialized by the -permission-ttl flag.
+var blob_signature_ttl time.Duration
 
 // data_manager_token represents the API token used by the
 // Data Manager, and is required on certain privileged operations.
-// Initialized by the --data-manager-token-file flag.
+// Initialized by the -data-manager-token-file flag.
 var data_manager_token string
 
 // never_delete can be used to prevent the DELETE handler from
 // actually deleting anything.
 var never_delete = false
 
+var maxBuffers = 128
+var bufs *bufferPool
+
 // ==========
 // Error types.
 //
@@ -73,7 +78,8 @@ var (
        NotFoundError       = &KeepError{404, "Not Found"}
        GenericError        = &KeepError{500, "Fail"}
        FullError           = &KeepError{503, "Full"}
-       TooLongError        = &KeepError{504, "Timeout"}
+       SizeRequiredError   = &KeepError{411, "Missing Content-Length"}
+       TooLongError        = &KeepError{413, "Block is too large"}
        MethodDisabledError = &KeepError{405, "Method disabled"}
 )
 
@@ -103,40 +109,108 @@ var KeepVM VolumeManager
 var pullq *WorkQueue
 var trashq *WorkQueue
 
+var (
+       flagSerializeIO bool
+       flagReadonly    bool
+)
+
+type volumeSet []Volume
+
+func (vs *volumeSet) Set(value string) error {
+       if dirs := strings.Split(value, ","); len(dirs) > 1 {
+               log.Print("DEPRECATED: using comma-separated volume list.")
+               for _, dir := range dirs {
+                       if err := vs.Set(dir); err != nil {
+                               return err
+                       }
+               }
+               return nil
+       }
+       if len(value) == 0 || value[0] != '/' {
+               return errors.New("Invalid volume: must begin with '/'.")
+       }
+       if _, err := os.Stat(value); err != nil {
+               return err
+       }
+       *vs = append(*vs, &UnixVolume{
+               root:      value,
+               serialize: flagSerializeIO,
+               readonly:  flagReadonly,
+       })
+       return nil
+}
+
+func (vs *volumeSet) String() string {
+       s := "["
+       for i, v := range *vs {
+               if i > 0 {
+                       s = s + " "
+               }
+               s = s + v.String()
+       }
+       return s + "]"
+}
+
+// Discover adds a volume for every directory named "keep" that is
+// located at the top level of a device- or tmpfs-backed mount point
+// other than "/". It returns the number of volumes added.
+func (vs *volumeSet) Discover() int {
+       added := 0
+       f, err := os.Open(PROC_MOUNTS)
+       if err != nil {
+               log.Fatalf("opening %s: %s", PROC_MOUNTS, err)
+       }
+       scanner := bufio.NewScanner(f)
+       for scanner.Scan() {
+               args := strings.Fields(scanner.Text())
+               if err := scanner.Err(); err != nil {
+                       log.Fatalf("reading %s: %s", PROC_MOUNTS, err)
+               }
+               dev, mount := args[0], args[1]
+               if mount == "/" {
+                       continue
+               }
+               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
+                       continue
+               }
+               keepdir := mount + "/keep"
+               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
+                       continue
+               }
+               // Set the -readonly flag (but only for this volume)
+               // if the filesystem is mounted readonly.
+               flagReadonlyWas := flagReadonly
+               for _, fsopt := range strings.Split(args[3], ",") {
+                       if fsopt == "ro" {
+                               flagReadonly = true
+                               break
+                       }
+                       if fsopt == "rw" {
+                               break
+                       }
+               }
+               vs.Set(keepdir)
+               flagReadonly = flagReadonlyWas
+               added++
+       }
+       return added
+}
+
 // TODO(twp): continue moving as much code as possible out of main
 // so it can be effectively tested. Esp. handling and postprocessing
 // of command line flags (identifying Keep volumes and initializing
 // permission arguments).
 
 func main() {
-       log.Println("Keep started: pid", os.Getpid())
-
-       // Parse command-line flags:
-       //
-       // -listen=ipaddr:port
-       //    Interface on which to listen for requests. Use :port without
-       //    an ipaddr to listen on all network interfaces.
-       //    Examples:
-       //      -listen=127.0.0.1:4949
-       //      -listen=10.0.1.24:8000
-       //      -listen=:25107 (to listen to port 25107 on all interfaces)
-       //
-       // -volumes
-       //    A comma-separated list of directories to use as Keep volumes.
-       //    Example:
-       //      -volumes=/var/keep01,/var/keep02,/var/keep03/subdir
-       //
-       //    If -volumes is empty or is not present, Keep will select volumes
-       //    by looking at currently mounted filesystems for /keep top-level
-       //    directories.
+       log.Println("keepstore starting, pid", os.Getpid())
+       defer log.Println("keepstore exiting, pid", os.Getpid())
 
        var (
                data_manager_token_file string
                listen                  string
-               permission_key_file     string
+               blob_signing_key_file   string
                permission_ttl_sec      int
-               serialize_io            bool
-               volumearg               string
+               volumes                 volumeSet
                pidfile                 string
        )
        flag.StringVar(
@@ -154,9 +228,7 @@ func main() {
                &listen,
                "listen",
                DEFAULT_ADDR,
-               "Interface on which to listen for requests, in the format "+
-                       "ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port "+
-                       "to listen on all network interfaces.")
+               "Listening address, in the form \"host:port\". e.g., 10.0.1.24:8000. Omit the host part to listen on all interfaces.")
        flag.BoolVar(
                &never_delete,
                "never-delete",
@@ -164,65 +236,96 @@ func main() {
                "If set, nothing will be deleted. HTTP 405 will be returned "+
                        "for valid DELETE requests.")
        flag.StringVar(
-               &permission_key_file,
+               &blob_signing_key_file,
                "permission-key-file",
                "",
+               "Synonym for -blob-signing-key-file.")
+       flag.StringVar(
+               &blob_signing_key_file,
+               "blob-signing-key-file",
+               "",
                "File containing the secret key for generating and verifying "+
-                       "permission signatures.")
+                       "blob permission signatures.")
        flag.IntVar(
                &permission_ttl_sec,
                "permission-ttl",
-               1209600,
-               "Expiration time (in seconds) for newly generated permission "+
-                       "signatures.")
+               0,
+               "Synonym for -blob-signature-ttl.")
+       flag.IntVar(
+               &permission_ttl_sec,
+               "blob-signature-ttl",
+               int(time.Duration(2*7*24*time.Hour).Seconds()),
+               "Lifetime of blob permission signatures. "+
+                       "See services/api/config/application.default.yml.")
        flag.BoolVar(
-               &serialize_io,
+               &flagSerializeIO,
                "serialize",
                false,
-               "If set, all read and write operations on local Keep volumes will "+
-                       "be serialized.")
-       flag.StringVar(
-               &volumearg,
+               "Serialize read and write operations on the following volumes.")
+       flag.BoolVar(
+               &flagReadonly,
+               "readonly",
+               false,
+               "Do not write, delete, or touch anything on the following volumes.")
+       flag.Var(
+               &volumes,
                "volumes",
-               "",
-               "Comma-separated list of directories to use for Keep volumes, "+
-                       "e.g. -volumes=/var/keep1,/var/keep2. If empty or not "+
-                       "supplied, Keep will scan mounted filesystems for volumes "+
-                       "with a /keep top-level directory.")
-
+               "Deprecated synonym for -volume.")
+       flag.Var(
+               &volumes,
+               "volume",
+               "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
        flag.StringVar(
                &pidfile,
                "pid",
                "",
-               "Path to write pid file")
+               "Path to write pid file during startup. This file is kept open and locked with LOCK_EX until keepstore exits, so `fuser -k pidfile` is one way to shut down. Exit immediately if there is an error opening, locking, or writing the pid file.")
+       flag.IntVar(
+               &maxBuffers,
+               "max-buffers",
+               maxBuffers,
+               fmt.Sprintf("Maximum RAM to use for data buffers, given in multiples of block size (%d MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.", BLOCKSIZE>>20))
 
        flag.Parse()
 
-       // Look for local keep volumes.
-       var keepvols []string
-       if volumearg == "" {
-               // TODO(twp): decide whether this is desirable default behavior.
-               // In production we may want to require the admin to specify
-               // Keep volumes explicitly.
-               keepvols = FindKeepVolumes()
-       } else {
-               keepvols = strings.Split(volumearg, ",")
+       if maxBuffers < 0 {
+               log.Fatal("-max-buffers must be greater than zero.")
        }
+       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
 
-       // Check that the specified volumes actually exist.
-       var goodvols []Volume = nil
-       for _, v := range keepvols {
-               if _, err := os.Stat(v); err == nil {
-                       log.Println("adding Keep volume:", v)
-                       newvol := MakeUnixVolume(v, serialize_io)
-                       goodvols = append(goodvols, &newvol)
-               } else {
-                       log.Printf("bad Keep volume: %s\n", err)
+       if pidfile != "" {
+               f, err := os.OpenFile(pidfile, os.O_RDWR | os.O_CREATE, 0777)
+               if err != nil {
+                       log.Fatalf("open pidfile (%s): %s", pidfile, err)
+               }
+               err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX | syscall.LOCK_NB)
+               if err != nil {
+                       log.Fatalf("flock pidfile (%s): %s", pidfile, err)
+               }
+               err = f.Truncate(0)
+               if err != nil {
+                       log.Fatalf("truncate pidfile (%s): %s", pidfile, err)
+               }
+               _, err = fmt.Fprint(f, os.Getpid())
+               if err != nil {
+                       log.Fatalf("write pidfile (%s): %s", pidfile, err)
+               }
+               err = f.Sync()
+               if err != nil {
+                       log.Fatalf("sync pidfile (%s): %s", pidfile, err)
+               }
+               defer f.Close()
+               defer os.Remove(pidfile)
+       }
+
+       if len(volumes) == 0 {
+               if volumes.Discover() == 0 {
+                       log.Fatal("No volumes found.")
                }
        }
 
-       if len(goodvols) == 0 {
-               log.Fatal("could not find any keep volumes")
+       for _, v := range volumes {
+               log.Printf("Using volume %v (writable=%v)", v, v.Writable())
        }
 
        // Initialize data manager token and permission key.
@@ -235,33 +338,30 @@ func main() {
                        log.Fatalf("reading data manager token: %s\n", err)
                }
        }
-       if permission_key_file != "" {
-               if buf, err := ioutil.ReadFile(permission_key_file); err == nil {
+       if blob_signing_key_file != "" {
+               if buf, err := ioutil.ReadFile(blob_signing_key_file); err == nil {
                        PermissionSecret = bytes.TrimSpace(buf)
                } else {
                        log.Fatalf("reading permission key: %s\n", err)
                }
        }
 
-       // Initialize permission TTL
-       permission_ttl = time.Duration(permission_ttl_sec) * time.Second
+       blob_signature_ttl = time.Duration(permission_ttl_sec) * time.Second
 
-       // If --enforce-permissions is true, we must have a permission key
-       // to continue.
        if PermissionSecret == nil {
                if enforce_permissions {
-                       log.Fatal("--enforce-permissions requires a permission key")
+                       log.Fatal("-enforce-permissions requires a permission key")
                } else {
                        log.Println("Running without a PermissionSecret. Block locators " +
                                "returned by this server will not be signed, and will be rejected " +
                                "by a server that enforces permissions.")
-                       log.Println("To fix this, run Keep with --permission-key-file=<path> " +
-                               "to define the location of a file containing the permission key.")
+                       log.Println("To fix this, use the -blob-signing-key-file flag " +
+                               "to specify the file containing the permission key.")
                }
        }
 
        // Start a round-robin VolumeManager with the volumes we have found.
-       KeepVM = MakeRRVolumeManager(goodvols)
+       KeepVM = MakeRRVolumeManager(volumes)
 
        // Tell the built-in HTTP server to direct all requests to the REST router.
        loggingRouter := MakeLoggingRESTRouter()
@@ -276,7 +376,7 @@ func main() {
        }
 
        // Initialize Pull queue and worker
-       keepClient := keepclient.KeepClient{
+       keepClient := &keepclient.KeepClient{
                Arvados:       nil,
                Want_replicas: 1,
                Using_proxy:   true,
@@ -300,24 +400,9 @@ func main() {
                listener.Close()
        }(term)
        signal.Notify(term, syscall.SIGTERM)
+       signal.Notify(term, syscall.SIGINT)
 
-       if pidfile != "" {
-               f, err := os.Create(pidfile)
-               if err == nil {
-                       fmt.Fprint(f, os.Getpid())
-                       f.Close()
-               } else {
-                       log.Printf("Error writing pid file (%s): %s", pidfile, err.Error())
-               }
-       }
-
-       // Start listening for requests.
+       log.Println("listening at", listen)
        srv := &http.Server{Addr: listen}
        srv.Serve(listener)
-
-       log.Println("shutting down")
-
-       if pidfile != "" {
-               os.Remove(pidfile)
-       }
 }
index 686f502500cec1e6e7ad3508f2978ef55522a6dc..811cc70d3f75460642a0af60e8138301764b21ad 100644 (file)
@@ -52,9 +52,9 @@ func TestGetBlock(t *testing.T) {
 
        // Prepare two test Keep volumes. Our block is stored on the second volume.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
        if err := vols[1].Put(TEST_HASH, TEST_BLOCK); err != nil {
                t.Error(err)
        }
@@ -77,7 +77,7 @@ func TestGetBlockMissing(t *testing.T) {
 
        // Create two empty test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
        // Check that GetBlock returns failure.
        result, err := GetBlock(TEST_HASH, false)
@@ -95,9 +95,9 @@ func TestGetBlockCorrupt(t *testing.T) {
 
        // Create two test Keep volumes and store a corrupt block in one.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
        vols[0].Put(TEST_HASH, BAD_BLOCK)
 
        // Check that GetBlock returns failure.
@@ -119,15 +119,15 @@ func TestPutBlockOK(t *testing.T) {
 
        // Create two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
        // Check that PutBlock stores the data as expected.
        if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
                t.Fatalf("PutBlock: %v", err)
        }
 
-       vols := KeepVM.Volumes()
-       result, err := vols[0].Get(TEST_HASH)
+       vols := KeepVM.AllReadable()
+       result, err := vols[1].Get(TEST_HASH)
        if err != nil {
                t.Fatalf("Volume #0 Get returned error: %v", err)
        }
@@ -146,9 +146,9 @@ func TestPutBlockOneVol(t *testing.T) {
 
        // Create two test Keep volumes, but cripple one of them.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
        vols[0].(*MockVolume).Bad = true
 
        // Check that PutBlock stores the data as expected.
@@ -176,7 +176,7 @@ func TestPutBlockMD5Fail(t *testing.T) {
 
        // Create two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
        // Check that PutBlock returns the expected error when the hash does
        // not match the block.
@@ -200,10 +200,10 @@ func TestPutBlockCorrupt(t *testing.T) {
 
        // Create two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
        // Store a corrupted block under TEST_HASH.
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
        vols[0].Put(TEST_HASH, BAD_BLOCK)
        if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
                t.Errorf("PutBlock: %v", err)
@@ -231,7 +231,7 @@ func TestPutBlockCollision(t *testing.T) {
 
        // Prepare two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
        // Store one block, then attempt to store the other. Confirm that
        // PutBlock reported a CollisionError.
@@ -254,8 +254,8 @@ func TestPutBlockTouchFails(t *testing.T) {
 
        // Prepare two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
-       vols := KeepVM.Volumes()
+       defer KeepVM.Close()
+       vols := KeepVM.AllWritable()
 
        // Store a block and then make the underlying volume bad,
        // so a subsequent attempt to update the file timestamp
@@ -293,29 +293,16 @@ func TestPutBlockTouchFails(t *testing.T) {
        }
 }
 
-// ========================================
-// FindKeepVolumes tests.
-// ========================================
-
-// TestFindKeepVolumes
-//     Confirms that FindKeepVolumes finds tmpfs volumes with "/keep"
-//     directories at the top level.
-//
-func TestFindKeepVolumes(t *testing.T) {
-       var tempVols [2]string
+func TestDiscoverTmpfs(t *testing.T) {
+       var tempVols [4]string
        var err error
 
-       defer func() {
-               for _, path := range tempVols {
-                       os.RemoveAll(path)
-               }
-       }()
-
-       // Create two directories suitable for using as keep volumes.
+       // Create some directories suitable for using as keep volumes.
        for i := range tempVols {
                if tempVols[i], err = ioutil.TempDir("", "findvol"); err != nil {
                        t.Fatal(err)
                }
+               defer os.RemoveAll(tempVols[i])
                tempVols[i] = tempVols[i] + "/keep"
                if err = os.Mkdir(tempVols[i], 0755); err != nil {
                        t.Fatal(err)
@@ -323,53 +310,69 @@ func TestFindKeepVolumes(t *testing.T) {
        }
 
        // Set up a bogus PROC_MOUNTS file.
-       if f, err := ioutil.TempFile("", "keeptest"); err == nil {
-               for _, vol := range tempVols {
-                       fmt.Fprintf(f, "tmpfs %s tmpfs opts\n", path.Dir(vol))
+       f, err := ioutil.TempFile("", "keeptest")
+       if err != nil {
+               t.Fatal(err)
+       }
+       defer os.Remove(f.Name())
+       for i, vol := range tempVols {
+               // Add readonly mount points at odd indexes.
+               var opts string
+               switch i % 2 {
+               case 0:
+                       opts = "rw,nosuid,nodev,noexec"
+               case 1:
+                       opts = "nosuid,nodev,noexec,ro"
                }
-               f.Close()
-               PROC_MOUNTS = f.Name()
-
-               // Check that FindKeepVolumes finds the temp volumes.
-               resultVols := FindKeepVolumes()
-               if len(tempVols) != len(resultVols) {
-                       t.Fatalf("set up %d volumes, FindKeepVolumes found %d\n",
-                               len(tempVols), len(resultVols))
+               fmt.Fprintf(f, "tmpfs %s tmpfs %s 0 0\n", path.Dir(vol), opts)
+       }
+       f.Close()
+       PROC_MOUNTS = f.Name()
+
+       var resultVols volumeSet
+       added := resultVols.Discover()
+
+       if added != len(resultVols) {
+               t.Errorf("Discover returned %d, but added %d volumes",
+                       added, len(resultVols))
+       }
+       if added != len(tempVols) {
+               t.Errorf("Discover returned %d but we set up %d volumes",
+                       added, len(tempVols))
+       }
+       for i, tmpdir := range tempVols {
+               if tmpdir != resultVols[i].(*UnixVolume).root {
+                       t.Errorf("Discover returned %s, expected %s\n",
+                               resultVols[i].(*UnixVolume).root, tmpdir)
                }
-               for i := range tempVols {
-                       if tempVols[i] != resultVols[i] {
-                               t.Errorf("FindKeepVolumes returned %s, expected %s\n",
-                                       resultVols[i], tempVols[i])
-                       }
+               if expectReadonly := i%2 == 1; expectReadonly != resultVols[i].(*UnixVolume).readonly {
+                       t.Errorf("Discover added %s with readonly=%v, should be %v",
+                               tmpdir, !expectReadonly, expectReadonly)
                }
-
-               os.Remove(f.Name())
        }
 }
 
-// TestFindKeepVolumesFail
-//     When no Keep volumes are present, FindKeepVolumes returns an empty slice.
-//
-func TestFindKeepVolumesFail(t *testing.T) {
+func TestDiscoverNone(t *testing.T) {
        defer teardown()
 
        // Set up a bogus PROC_MOUNTS file with no Keep vols.
-       if f, err := ioutil.TempFile("", "keeptest"); err == nil {
-               fmt.Fprintln(f, "rootfs / rootfs opts 0 0")
-               fmt.Fprintln(f, "sysfs /sys sysfs opts 0 0")
-               fmt.Fprintln(f, "proc /proc proc opts 0 0")
-               fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
-               fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
-               f.Close()
-               PROC_MOUNTS = f.Name()
-
-               // Check that FindKeepVolumes returns an empty array.
-               resultVols := FindKeepVolumes()
-               if len(resultVols) != 0 {
-                       t.Fatalf("FindKeepVolumes returned %v", resultVols)
-               }
-
-               os.Remove(PROC_MOUNTS)
+       f, err := ioutil.TempFile("", "keeptest")
+       if err != nil {
+               t.Fatal(err)
+       }
+       defer os.Remove(f.Name())
+       fmt.Fprintln(f, "rootfs / rootfs opts 0 0")
+       fmt.Fprintln(f, "sysfs /sys sysfs opts 0 0")
+       fmt.Fprintln(f, "proc /proc proc opts 0 0")
+       fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
+       fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
+       f.Close()
+       PROC_MOUNTS = f.Name()
+
+       var resultVols volumeSet
+       added := resultVols.Discover()
+       if added != 0 || len(resultVols) != 0 {
+               t.Fatalf("got %d, %v; expected 0, []", added, resultVols)
        }
 }
 
@@ -382,17 +385,19 @@ func TestIndex(t *testing.T) {
        // Include multiple blocks on different volumes, and
        // some metadata files.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
        vols[0].Put(TEST_HASH, TEST_BLOCK)
        vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
        vols[0].Put(TEST_HASH_3, TEST_BLOCK_3)
        vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
        vols[1].Put(TEST_HASH_2+".meta", []byte("metadata"))
 
-       index := vols[0].Index("") + vols[1].Index("")
-       index_rows := strings.Split(index, "\n")
+       buf := new(bytes.Buffer)
+       vols[0].IndexTo("", buf)
+       vols[1].IndexTo("", buf)
+       index_rows := strings.Split(string(buf.Bytes()), "\n")
        sort.Strings(index_rows)
        sorted_index := strings.Join(index_rows, "\n")
        expected := `^\n` + TEST_HASH + `\+\d+ \d+\n` +
@@ -402,7 +407,7 @@ func TestIndex(t *testing.T) {
        match, err := regexp.MatchString(expected, sorted_index)
        if err == nil {
                if !match {
-                       t.Errorf("IndexLocators returned:\n%s", index)
+                       t.Errorf("IndexLocators returned:\n%s", string(buf.Bytes()))
                }
        } else {
                t.Errorf("regexp.MatchString: %s", err)
@@ -420,9 +425,9 @@ func TestNodeStatus(t *testing.T) {
 
        // Set up test Keep volumes with some blocks.
        KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
 
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
        vols[0].Put(TEST_HASH, TEST_BLOCK)
        vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
 
@@ -450,10 +455,8 @@ func TestNodeStatus(t *testing.T) {
 // Helper functions for unit tests.
 // ========================================
 
-// MakeTestVolumeManager
-//     Creates and returns a RRVolumeManager with the specified number
-//     of MockVolumes.
-//
+// MakeTestVolumeManager returns a RRVolumeManager with the specified
+// number of MockVolumes.
 func MakeTestVolumeManager(num_volumes int) VolumeManager {
        vols := make([]Volume, num_volumes)
        for i := range vols {
@@ -462,9 +465,7 @@ func MakeTestVolumeManager(num_volumes int) VolumeManager {
        return MakeRRVolumeManager(vols)
 }
 
-// teardown
-//     Cleanup to perform after each test.
-//
+// teardown cleans up after each test.
 func teardown() {
        data_manager_token = ""
        enforce_permissions = false
index e30df876322ab9c09acdbab110f851ea0a954e20..b622d1d3eefd2eb3bedfb5e4260976cb2075f85a 100644 (file)
@@ -8,6 +8,7 @@ import (
        "log"
        "net/http"
        "strings"
+       "time"
 )
 
 type LoggingResponseWriter struct {
@@ -40,12 +41,13 @@ func MakeLoggingRESTRouter() *LoggingRESTRouter {
 }
 
 func (loggingRouter *LoggingRESTRouter) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       loggingWriter := LoggingResponseWriter{200, 0, resp, ""}
+       t0 := time.Now()
+       loggingWriter := LoggingResponseWriter{http.StatusOK, 0, resp, ""}
        loggingRouter.router.ServeHTTP(&loggingWriter, req)
-       statusText := "OK"
+       statusText := http.StatusText(loggingWriter.Status)
        if loggingWriter.Status >= 400 {
                statusText = strings.Replace(loggingWriter.ResponseBody, "\n", "", -1)
        }
-       log.Printf("[%s] %s %s %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], loggingWriter.Status, loggingWriter.Length, statusText)
+       log.Printf("[%s] %s %s %.6fs %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], time.Since(t0).Seconds(), loggingWriter.Status, loggingWriter.Length, statusText)
 
 }
index d0081cd01014b69abf9910ffad059077d19549ee..7367dbf0adc33d47b74e34fda37d898c4e34242d 100644 (file)
@@ -48,15 +48,15 @@ func TestVerifySignatureExtraHints(t *testing.T) {
        PermissionSecret = []byte(known_key)
        defer func() { PermissionSecret = nil }()
 
-       if !VerifySignature(known_locator + "+K@xyzzy" + known_sig_hint, known_token) {
+       if !VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint, known_token) {
                t.Fatal("Verify cannot handle hint before permission signature")
        }
 
-       if !VerifySignature(known_locator + known_sig_hint + "+Zfoo", known_token) {
+       if !VerifySignature(known_locator+known_sig_hint+"+Zfoo", known_token) {
                t.Fatal("Verify cannot handle hint after permission signature")
        }
 
-       if !VerifySignature(known_locator + "+K@xyzzy" + known_sig_hint + "+Zfoo", known_token) {
+       if !VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint+"+Zfoo", known_token) {
                t.Fatal("Verify cannot handle hints around permission signature")
        }
 }
@@ -66,11 +66,11 @@ func TestVerifySignatureWrongSize(t *testing.T) {
        PermissionSecret = []byte(known_key)
        defer func() { PermissionSecret = nil }()
 
-       if !VerifySignature(known_hash + "+999999" + known_sig_hint, known_token) {
+       if !VerifySignature(known_hash+"+999999"+known_sig_hint, known_token) {
                t.Fatal("Verify cannot handle incorrect size hint")
        }
 
-       if !VerifySignature(known_hash + known_sig_hint, known_token) {
+       if !VerifySignature(known_hash+known_sig_hint, known_token) {
                t.Fatal("Verify cannot handle missing size hint")
        }
 }
index fac4bb15030eaaa8334bf375dc2a9baa4695fbb0..d85458a325a1c44e2e53d177da6bd12f8adbe07b 100644 (file)
@@ -19,7 +19,7 @@ import (
                        Skip the rest of the servers if no errors
                Repeat
 */
-func RunPullWorker(pullq *WorkQueue, keepClient keepclient.KeepClient) {
+func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
        nextItem := pullq.NextItem
        for item := range nextItem {
                pullRequest := item.(PullRequest)
@@ -39,14 +39,14 @@ func RunPullWorker(pullq *WorkQueue, keepClient keepclient.KeepClient) {
                Using this token & signature, retrieve the given block.
                Write to storage
 */
-func PullItemAndProcess(pullRequest PullRequest, token string, keepClient keepclient.KeepClient) (err error) {
+func PullItemAndProcess(pullRequest PullRequest, token string, keepClient *keepclient.KeepClient) (err error) {
        keepClient.Arvados.ApiToken = token
 
        service_roots := make(map[string]string)
        for _, addr := range pullRequest.Servers {
                service_roots[addr] = addr
        }
-       keepClient.SetServiceRoots(service_roots)
+       keepClient.SetServiceRoots(service_roots, nil)
 
        // Generate signature with a random token
        expires_at := time.Now().Add(60 * time.Second)
@@ -75,7 +75,7 @@ func PullItemAndProcess(pullRequest PullRequest, token string, keepClient keepcl
 }
 
 // Fetch the content for the given locator using keepclient.
-var GetContent = func(signedLocator string, keepClient keepclient.KeepClient) (
+var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
        reader io.ReadCloser, contentLength int64, url string, err error) {
        reader, blocklen, url, err := keepClient.Get(signedLocator)
        return reader, blocklen, url, err
index b293cf92ea87260dd487e5e9d190a85aca779708..762abff533213747b16562b5a6127b0efa4203f2 100644 (file)
@@ -1,16 +1,19 @@
 package main
 
 import (
+       "bytes"
+       "errors"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io"
        "net/http"
        "os"
        "strings"
        "testing"
 )
 
-var keepClient keepclient.KeepClient
+var keepClient *keepclient.KeepClient
 
 type PullWorkIntegrationTestData struct {
        Name     string
@@ -33,7 +36,7 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
        }
 
        // keep client
-       keepClient = keepclient.KeepClient{
+       keepClient = &keepclient.KeepClient{
                Arvados:       &arv,
                Want_replicas: 1,
                Using_proxy:   true,
@@ -42,17 +45,15 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
 
        // discover keep services
        var servers []string
-       service_roots, err := keepClient.DiscoverKeepServers()
-       if err != nil {
+       if err := keepClient.DiscoverKeepServers(); err != nil {
                t.Error("Error discovering keep services")
        }
-       for _, host := range service_roots {
+       for _, host := range keepClient.LocalRoots() {
                servers = append(servers, host)
        }
 
        // Put content if the test needs it
        if wantData {
-               keepClient.SetServiceRoots(service_roots)
                locator, _, err := keepClient.PutB([]byte(testData.Content))
                if err != nil {
                        t.Errorf("Error putting test data in setup for %s %s %v", testData.Content, locator, err)
@@ -106,6 +107,7 @@ func TestPullWorkerIntegration_GetExistingLocator(t *testing.T) {
 func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pullRequest PullRequest, t *testing.T) {
 
        // Override PutContent to mock PutBlock functionality
+       defer func(orig func([]byte, string)(error)) { PutContent = orig }(PutContent)
        PutContent = func(content []byte, locator string) (err error) {
                if string(content) != testData.Content {
                        t.Errorf("PutContent invoked with unexpected data. Expected: %s; Found: %s", testData.Content, content)
@@ -113,16 +115,27 @@ func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pull
                return
        }
 
+       // Override GetContent to mock keepclient Get functionality
+       defer func(orig func(string, *keepclient.KeepClient)(io.ReadCloser, int64, string, error)) { GetContent = orig }(GetContent)
+       GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
+               reader io.ReadCloser, contentLength int64, url string, err error) {
+               if testData.GetError != "" {
+                       return nil, 0, "", errors.New(testData.GetError)
+               }
+               rdr := &ClosingBuffer{bytes.NewBufferString(testData.Content)}
+               return rdr, int64(len(testData.Content)), "", nil
+       }
+
        keepClient.Arvados.ApiToken = GenerateRandomApiToken()
        err := PullItemAndProcess(pullRequest, keepClient.Arvados.ApiToken, keepClient)
 
        if len(testData.GetError) > 0 {
                if (err == nil) || (!strings.Contains(err.Error(), testData.GetError)) {
-                       t.Errorf("Got error %v", err)
+                       t.Errorf("Got error %v, expected %v", err, testData.GetError)
                }
        } else {
                if err != nil {
-                       t.Errorf("Got error %v", err)
+                       t.Errorf("Got error %v, expected nil", err)
                }
        }
 }
index f0e9e65f1ee1015a57c2bd87e8d9c926978f21c4..0833bc696763a867bfab287a75805b57ce933ae6 100644 (file)
@@ -56,14 +56,13 @@ func RunTestPullWorker(c *C) {
 
 var first_pull_list = []byte(`[
                {
-                       "locator":"locator1",
+                       "locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
                        "servers":[
                                "server_1",
                                "server_2"
                        ]
-               },
-    {
-                       "locator":"locator2",
+               },{
+                       "locator":"37b51d194a7513e45b56f6524f2d51f2+3",
                        "servers":[
                                "server_3"
                        ]
@@ -72,10 +71,10 @@ var first_pull_list = []byte(`[
 
 var second_pull_list = []byte(`[
                {
-                       "locator":"locator3",
+                       "locator":"73feffa4b7f6bb68e44cf984c85f6e88+3",
                        "servers":[
                                "server_1",
-        "server_2"
+                               "server_2"
                        ]
                }
        ]`)
@@ -244,7 +243,8 @@ func performTest(testData PullWorkerTestData, c *C) {
        testPullLists[testData.name] = testData.response_body
 
        // Override GetContent to mock keepclient Get functionality
-       GetContent = func(signedLocator string, keepClient keepclient.KeepClient) (
+       defer func(orig func(string, *keepclient.KeepClient)(io.ReadCloser, int64, string, error)) { GetContent = orig }(GetContent)
+       GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
                reader io.ReadCloser, contentLength int64, url string, err error) {
 
                processedPullLists[testData.name] = testData.response_body
@@ -262,6 +262,7 @@ func performTest(testData PullWorkerTestData, c *C) {
        }
 
        // Override PutContent to mock PutBlock functionality
+       defer func(orig func([]byte, string)(error)) { PutContent = orig }(PutContent)
        PutContent = func(content []byte, locator string) (err error) {
                if testData.put_error {
                        err = errors.New("Error putting data")
@@ -274,8 +275,8 @@ func performTest(testData PullWorkerTestData, c *C) {
        }
 
        response := IssueRequest(&testData.req)
-       c.Assert(testData.response_code, Equals, response.Code)
-       c.Assert(testData.response_body, Equals, response.Body.String())
+       c.Assert(response.Code, Equals, testData.response_code)
+       c.Assert(response.Body.String(), Equals, testData.response_body)
 
        expectWorkerChannelEmpty(c, pullq.NextItem)
 
index 72b9a4663adb0099a7068cfd2e0f0f9ac4228f77..bc1775f97eb3e0c700c8197fa5b21b77ecc019e0 100644 (file)
@@ -14,31 +14,34 @@ import (
 */
 
 func RunTrashWorker(trashq *WorkQueue) {
-       nextItem := trashq.NextItem
-       for item := range nextItem {
+       for item := range trashq.NextItem {
                trashRequest := item.(TrashRequest)
-               err := TrashItem(trashRequest)
-               if err != nil {
-                       log.Printf("Trash request error for %s: %s", trashRequest, err)
-               }
+               TrashItem(trashRequest)
        }
 }
 
-/*
-       Delete the block indicated by the Locator in TrashRequest.
-*/
-func TrashItem(trashRequest TrashRequest) (err error) {
-       // Verify if the block is to be deleted based on its Mtime
-       for _, volume := range KeepVM.Volumes() {
+// TrashItem deletes the indicated block from every writable volume.
+func TrashItem(trashRequest TrashRequest) {
+       reqMtime := time.Unix(trashRequest.BlockMtime, 0)
+       if time.Since(reqMtime) < blob_signature_ttl {
+               log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blob_signature_ttl is %v! Skipping.",
+                       time.Since(reqMtime),
+                       trashRequest.Locator,
+                       trashRequest.BlockMtime,
+                       reqMtime,
+                       blob_signature_ttl)
+               return
+       }
+       for _, volume := range KeepVM.AllWritable() {
                mtime, err := volume.Mtime(trashRequest.Locator)
-               if err == nil {
-                       if trashRequest.BlockMtime == mtime.Unix() {
-                               currentTime := time.Now().Unix()
-                               if time.Duration(currentTime-trashRequest.BlockMtime)*time.Second >= permission_ttl {
-                                       err = volume.Delete(trashRequest.Locator)
-                               }
-                       }
+               if err != nil || trashRequest.BlockMtime != mtime.Unix() {
+                       continue
+               }
+               err = volume.Delete(trashRequest.Locator)
+               if err != nil {
+                       log.Printf("%v Delete(%v): %v", volume, trashRequest.Locator, err)
+                       continue
                }
+               log.Printf("%v Delete(%v) OK", volume, trashRequest.Locator)
        }
-       return
 }
index 3031c2582d57c1e9afa1a1e3fdd9c1fbed6d8a0d..0511b48d372fc3cc33b5513411512f624f12a199 100644 (file)
@@ -15,11 +15,11 @@ type TrashWorkerTestData struct {
        Block2      []byte
        BlockMtime2 int64
 
-       CreateData       bool
-       CreateInVolume1  bool
-       UseDelayToCreate bool
+       CreateData      bool
+       CreateInVolume1 bool
 
        UseTrashLifeTime bool
+       DifferentMtimes  bool
 
        DeleteLocator string
 
@@ -122,8 +122,8 @@ func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *test
                Locator2: TEST_HASH,
                Block2:   TEST_BLOCK,
 
-               CreateData:       true,
-               UseDelayToCreate: true,
+               CreateData:      true,
+               DifferentMtimes: true,
 
                DeleteLocator: TEST_HASH,
 
@@ -183,22 +183,16 @@ func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(
 
 /* Perform the test */
 func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
-       actual_permission_ttl := permission_ttl
-
        // Create Keep Volumes
        KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
 
        // Put test content
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
        if testData.CreateData {
                vols[0].Put(testData.Locator1, testData.Block1)
                vols[0].Put(testData.Locator1+".meta", []byte("metadata"))
 
-               // One of the tests deletes a locator with different Mtimes in two different volumes
-               if testData.UseDelayToCreate {
-                       time.Sleep(1 * time.Second)
-               }
-
                if testData.CreateInVolume1 {
                        vols[0].Put(testData.Locator2, testData.Block2)
                        vols[0].Put(testData.Locator2+".meta", []byte("metadata"))
@@ -208,24 +202,30 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
                }
        }
 
+       oldBlockTime := time.Now().Add(-blob_signature_ttl - time.Minute)
+
        // Create TrashRequest for the test
        trashRequest := TrashRequest{
                Locator:    testData.DeleteLocator,
-               BlockMtime: time.Now().Unix(),
+               BlockMtime: oldBlockTime.Unix(),
        }
 
-       // delay by permission_ttl to allow deletes to work
-       time.Sleep(1 * time.Second)
-
        // Run trash worker and put the trashRequest on trashq
        trashList := list.New()
        trashList.PushBack(trashRequest)
        trashq = NewWorkQueue()
+       defer trashq.Close()
 
-       // Trash worker would not delete block if its Mtime is within trash life time.
-       // Hence, we will have to bypass it to allow the deletion to succeed.
        if !testData.UseTrashLifeTime {
-               permission_ttl = time.Duration(1) * time.Second
+               // Trash worker would not delete block if its Mtime is
+               // within trash life time. Back-date the block to
+               // allow the deletion to succeed.
+               for _, v := range vols {
+                       v.(*MockVolume).Timestamps[testData.DeleteLocator] = oldBlockTime
+                       if testData.DifferentMtimes {
+                               oldBlockTime = oldBlockTime.Add(time.Second)
+                       }
+               }
        }
        go RunTrashWorker(trashq)
 
@@ -258,23 +258,18 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
                }
        }
 
-       // One test used the same locator in two different volumes but with different Mtime values
-       // Hence let's verify that only one volume has it and the other is deleted
-       if (testData.ExpectLocator1) &&
-               (testData.Locator1 == testData.Locator2) {
+       // The DifferentMtimes test puts the same locator in two
+       // different volumes, but only one copy has an Mtime matching
+       // the trash request.
+       if testData.DifferentMtimes {
                locatorFoundIn := 0
-               for _, volume := range KeepVM.Volumes() {
+               for _, volume := range KeepVM.AllReadable() {
                        if _, err := volume.Get(testData.Locator1); err == nil {
                                locatorFoundIn = locatorFoundIn + 1
                        }
                }
                if locatorFoundIn != 1 {
-                       t.Errorf("Expected locator to be found in only one volume after deleting. But found: %s", locatorFoundIn)
+                       t.Errorf("Found %d copies of %s, expected 1", locatorFoundIn, testData.Locator1)
                }
        }
-
-       // Done
-       permission_ttl = actual_permission_ttl
-       trashq.Close()
-       KeepVM.Quit()
 }
index e7683ee991a41ac8a8f45cfa9f5f263e2a7a42a6..64fea34bfe1c32ad9b6b6b33a74c82f8b9f0252f 100644 (file)
 package main
 
 import (
-       "errors"
-       "fmt"
-       "os"
-       "strings"
+       "io"
+       "sync/atomic"
        "time"
 )
 
 type Volume interface {
+       // Get a block. IFF the returned error is nil, the caller must
+       // put the returned slice back into the buffer pool when it's
+       // finished with it.
        Get(loc string) ([]byte, error)
        Put(loc string, block []byte) error
        Touch(loc string) error
        Mtime(loc string) (time.Time, error)
-       Index(prefix string) string
+       IndexTo(prefix string, writer io.Writer) error
        Delete(loc string) error
        Status() *VolumeStatus
        String() string
+       Writable() bool
 }
 
-// MockVolumes are Volumes used to test the Keep front end.
-//
-// If the Bad field is true, this volume should return an error
-// on all writes and puts.
-//
-// The Touchable field signifies whether the Touch method will
-// succeed.  Defaults to true.  Note that Bad and Touchable are
-// independent: a MockVolume may be set up so that Put fails but Touch
-// works or vice versa.
-//
-// TODO(twp): rename Bad to something more descriptive, e.g. Writable,
-// and make sure that the tests that rely on it are testing the right
-// thing.  We may need to simulate Writable, Touchable and Corrupt
-// volumes in different ways.
-//
-type MockVolume struct {
-       Store      map[string][]byte
-       Timestamps map[string]time.Time
-       Bad        bool
-       Touchable  bool
-}
-
-func CreateMockVolume() *MockVolume {
-       return &MockVolume{
-               Store:      make(map[string][]byte),
-               Timestamps: make(map[string]time.Time),
-               Bad:        false,
-               Touchable:  true,
-       }
-}
-
-func (v *MockVolume) Get(loc string) ([]byte, error) {
-       if v.Bad {
-               return nil, errors.New("Bad volume")
-       } else if block, ok := v.Store[loc]; ok {
-               return block, nil
-       }
-       return nil, os.ErrNotExist
+// A VolumeManager tells callers which volumes can read, which volumes
+// can write, and on which volume the next write should be attempted.
+type VolumeManager interface {
+       // AllReadable returns all volumes.
+       AllReadable() []Volume
+       // AllWritable returns all volumes that aren't known to be in
+       // a read-only state. (There is no guarantee that a write to
+       // one will succeed, though.)
+       AllWritable() []Volume
+       // NextWritable returns the volume where the next new block
+       // should be written. A VolumeManager can select a volume in
+       // order to distribute activity across spindles, fill up disks
+       // with more free space, etc.
+       NextWritable() Volume
+       // Close shuts down the volume manager cleanly.
+       Close()
 }
 
-func (v *MockVolume) Put(loc string, block []byte) error {
-       if v.Bad {
-               return errors.New("Bad volume")
-       }
-       v.Store[loc] = block
-       return v.Touch(loc)
+type RRVolumeManager struct {
+       readables []Volume
+       writables []Volume
+       counter   uint32
 }
 
-func (v *MockVolume) Touch(loc string) error {
-       if v.Touchable {
-               v.Timestamps[loc] = time.Now()
-               return nil
+func MakeRRVolumeManager(volumes []Volume) *RRVolumeManager {
+       vm := &RRVolumeManager{}
+       for _, v := range volumes {
+               vm.readables = append(vm.readables, v)
+               if v.Writable() {
+                       vm.writables = append(vm.writables, v)
+               }
        }
-       return errors.New("Touch failed")
+       return vm
 }
 
-func (v *MockVolume) Mtime(loc string) (time.Time, error) {
-       var mtime time.Time
-       var err error
-       if v.Bad {
-               err = errors.New("Bad volume")
-       } else if t, ok := v.Timestamps[loc]; ok {
-               mtime = t
-       } else {
-               err = os.ErrNotExist
-       }
-       return mtime, err
+func (vm *RRVolumeManager) AllReadable() []Volume {
+       return vm.readables
 }
 
-func (v *MockVolume) Index(prefix string) string {
-       var result string
-       for loc, block := range v.Store {
-               if IsValidLocator(loc) && strings.HasPrefix(loc, prefix) {
-                       result = result + fmt.Sprintf("%s+%d %d\n",
-                               loc, len(block), 123456789)
-               }
-       }
-       return result
+func (vm *RRVolumeManager) AllWritable() []Volume {
+       return vm.writables
 }
 
-func (v *MockVolume) Delete(loc string) error {
-       if _, ok := v.Store[loc]; ok {
-               if time.Since(v.Timestamps[loc]) < permission_ttl {
-                       return nil
-               }
-               delete(v.Store, loc)
+func (vm *RRVolumeManager) NextWritable() Volume {
+       if len(vm.writables) == 0 {
                return nil
        }
-       return os.ErrNotExist
-}
-
-func (v *MockVolume) Status() *VolumeStatus {
-       var used uint64
-       for _, block := range v.Store {
-               used = used + uint64(len(block))
-       }
-       return &VolumeStatus{"/bogo", 123, 1000000 - used, used}
-}
-
-func (v *MockVolume) String() string {
-       return "[MockVolume]"
-}
-
-// A VolumeManager manages a collection of volumes.
-//
-// - Volumes is a slice of available Volumes.
-// - Choose() returns a Volume suitable for writing to.
-// - Quit() instructs the VolumeManager to shut down gracefully.
-//
-type VolumeManager interface {
-       Volumes() []Volume
-       Choose() Volume
-       Quit()
-}
-
-type RRVolumeManager struct {
-       volumes   []Volume
-       nextwrite chan Volume
-       quit      chan int
-}
-
-func MakeRRVolumeManager(vols []Volume) *RRVolumeManager {
-       // Create a new VolumeManager struct with the specified volumes,
-       // and with new Nextwrite and Quit channels.
-       // The Quit channel is buffered with a capacity of 1 so that
-       // another routine may write to it without blocking.
-       vm := &RRVolumeManager{vols, make(chan Volume), make(chan int, 1)}
-
-       // This goroutine implements round-robin volume selection.
-       // It sends each available Volume in turn to the Nextwrite
-       // channel, until receiving a notification on the Quit channel
-       // that it should terminate.
-       go func() {
-               var i int = 0
-               for {
-                       select {
-                       case <-vm.quit:
-                               return
-                       case vm.nextwrite <- vm.volumes[i]:
-                               i = (i + 1) % len(vm.volumes)
-                       }
-               }
-       }()
-
-       return vm
-}
-
-func (vm *RRVolumeManager) Volumes() []Volume {
-       return vm.volumes
-}
-
-func (vm *RRVolumeManager) Choose() Volume {
-       return <-vm.nextwrite
+       i := atomic.AddUint32(&vm.counter, 1)
+       return vm.writables[i%uint32(len(vm.writables))]
 }
 
-func (vm *RRVolumeManager) Quit() {
-       vm.quit <- 1
+func (vm *RRVolumeManager) Close() {
 }
diff --git a/services/keepstore/volume_test.go b/services/keepstore/volume_test.go
new file mode 100644 (file)
index 0000000..2615019
--- /dev/null
@@ -0,0 +1,157 @@
+package main
+
+import (
+       "errors"
+       "fmt"
+       "io"
+       "os"
+       "strings"
+       "sync"
+       "time"
+)
+
+// MockVolumes are test doubles for Volumes, used to test handlers.
+type MockVolume struct {
+       Store      map[string][]byte
+       Timestamps map[string]time.Time
+       // Bad volumes return an error for every operation.
+       Bad bool
+       // Touchable volumes' Touch() method succeeds for a locator
+       // that has been Put().
+       Touchable bool
+       // Readonly volumes return an error for Put, Delete, and
+       // Touch.
+       Readonly bool
+       called   map[string]int
+       mutex    sync.Mutex
+}
+
+// CreateMockVolume returns a non-Bad, non-Readonly, Touchable mock
+// volume.
+func CreateMockVolume() *MockVolume {
+       return &MockVolume{
+               Store:      make(map[string][]byte),
+               Timestamps: make(map[string]time.Time),
+               Bad:        false,
+               Touchable:  true,
+               Readonly:   false,
+               called:     map[string]int{},
+       }
+}
+
+// CallCount returns how many times the named method has been called.
+func (v *MockVolume) CallCount(method string) int {
+       v.mutex.Lock()
+       defer v.mutex.Unlock()
+       if c, ok := v.called[method]; !ok {
+               return 0
+       } else {
+               return c
+       }
+}
+
+func (v *MockVolume) gotCall(method string) {
+       v.mutex.Lock()
+       defer v.mutex.Unlock()
+       if _, ok := v.called[method]; !ok {
+               v.called[method] = 1
+       } else {
+               v.called[method]++
+       }
+}
+
+func (v *MockVolume) Get(loc string) ([]byte, error) {
+       v.gotCall("Get")
+       if v.Bad {
+               return nil, errors.New("Bad volume")
+       } else if block, ok := v.Store[loc]; ok {
+               buf := bufs.Get(len(block))
+               copy(buf, block)
+               return buf, nil
+       }
+       return nil, os.ErrNotExist
+}
+
+func (v *MockVolume) Put(loc string, block []byte) error {
+       v.gotCall("Put")
+       if v.Bad {
+               return errors.New("Bad volume")
+       }
+       if v.Readonly {
+               return MethodDisabledError
+       }
+       v.Store[loc] = block
+       return v.Touch(loc)
+}
+
+func (v *MockVolume) Touch(loc string) error {
+       v.gotCall("Touch")
+       if v.Readonly {
+               return MethodDisabledError
+       }
+       if v.Touchable {
+               v.Timestamps[loc] = time.Now()
+               return nil
+       }
+       return errors.New("Touch failed")
+}
+
+func (v *MockVolume) Mtime(loc string) (time.Time, error) {
+       v.gotCall("Mtime")
+       var mtime time.Time
+       var err error
+       if v.Bad {
+               err = errors.New("Bad volume")
+       } else if t, ok := v.Timestamps[loc]; ok {
+               mtime = t
+       } else {
+               err = os.ErrNotExist
+       }
+       return mtime, err
+}
+
+func (v *MockVolume) IndexTo(prefix string, w io.Writer) error {
+       v.gotCall("IndexTo")
+       for loc, block := range v.Store {
+               if !IsValidLocator(loc) || !strings.HasPrefix(loc, prefix) {
+                       continue
+               }
+               _, err := fmt.Fprintf(w, "%s+%d %d\n",
+                       loc, len(block), 123456789)
+               if err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func (v *MockVolume) Delete(loc string) error {
+       v.gotCall("Delete")
+       if v.Readonly {
+               return MethodDisabledError
+       }
+       if _, ok := v.Store[loc]; ok {
+               if time.Since(v.Timestamps[loc]) < blob_signature_ttl {
+                       return nil
+               }
+               delete(v.Store, loc)
+               return nil
+       }
+       return os.ErrNotExist
+}
+
+func (v *MockVolume) Status() *VolumeStatus {
+       var used uint64
+       for _, block := range v.Store {
+               used = used + uint64(len(block))
+       }
+       return &VolumeStatus{"/bogo", 123, 1000000 - used, used}
+}
+
+func (v *MockVolume) String() string {
+       return "[MockVolume]"
+}
+
+func (v *MockVolume) Writable() bool {
+       return !v.Readonly
+}
index 84877c003409a5af12df2c6fe154102c528044df..61a98b5c736a8d3821fd0975df0d5dd5a0977a21 100644 (file)
@@ -4,106 +4,40 @@ package main
 
 import (
        "fmt"
+       "io"
        "io/ioutil"
        "log"
        "os"
        "path/filepath"
        "strconv"
        "strings"
+       "sync"
        "syscall"
        "time"
 )
 
-// IORequests are encapsulated Get or Put requests.  They are used to
-// implement serialized I/O (i.e. only one read/write operation per
-// volume). When running in serialized mode, the Keep front end sends
-// IORequests on a channel to an IORunner, which handles them one at a
-// time and returns an IOResponse.
-//
-type IOMethod int
-
-const (
-       KeepGet IOMethod = iota
-       KeepPut
-)
-
-type IORequest struct {
-       method IOMethod
-       loc    string
-       data   []byte
-       reply  chan *IOResponse
-}
-
-type IOResponse struct {
-       data []byte
-       err  error
-}
-
-// A UnixVolume has the following properties:
-//
-//   root
-//       the path to the volume's root directory
-//   queue
-//       A channel of IORequests. If non-nil, all I/O requests for
-//       this volume should be queued on this channel; the result
-//       will be delivered on the IOResponse channel supplied in the
-//       request.
-//
+// A UnixVolume stores and retrieves blocks in a local directory.
 type UnixVolume struct {
-       root  string // path to this volume
-       queue chan *IORequest
-}
-
-func (v *UnixVolume) IOHandler() {
-       for req := range v.queue {
-               var result IOResponse
-               switch req.method {
-               case KeepGet:
-                       result.data, result.err = v.Read(req.loc)
-               case KeepPut:
-                       result.err = v.Write(req.loc, req.data)
-               }
-               req.reply <- &result
-       }
-}
-
-func MakeUnixVolume(root string, serialize bool) (v UnixVolume) {
-       if serialize {
-               v = UnixVolume{root, make(chan *IORequest)}
-               go v.IOHandler()
-       } else {
-               v = UnixVolume{root, nil}
-       }
-       return
-}
-
-func (v *UnixVolume) Get(loc string) ([]byte, error) {
-       if v.queue == nil {
-               return v.Read(loc)
-       }
-       reply := make(chan *IOResponse)
-       v.queue <- &IORequest{KeepGet, loc, nil, reply}
-       response := <-reply
-       return response.data, response.err
-}
-
-func (v *UnixVolume) Put(loc string, block []byte) error {
-       if v.queue == nil {
-               return v.Write(loc, block)
-       }
-       reply := make(chan *IOResponse)
-       v.queue <- &IORequest{KeepPut, loc, block, reply}
-       response := <-reply
-       return response.err
+       root      string // path to the volume's root directory
+       serialize bool
+       readonly  bool
+       mutex     sync.Mutex
 }
 
 func (v *UnixVolume) Touch(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
        p := v.blockPath(loc)
        f, err := os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
        if err != nil {
                return err
        }
        defer f.Close()
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
        if e := lockfile(f); e != nil {
                return e
        }
@@ -122,28 +56,50 @@ func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
        }
 }
 
-// Read retrieves a block identified by the locator string "loc", and
+// Get retrieves a block identified by the locator string "loc", and
 // returns its contents as a byte slice.
 //
-// If the block could not be opened or read, Read returns a nil slice
-// and the os.Error that was generated.
-//
-// If the block is present but its content hash does not match loc,
-// Read returns the block and a CorruptError.  It is the caller's
-// responsibility to decide what (if anything) to do with the
-// corrupted data block.
-//
-func (v *UnixVolume) Read(loc string) ([]byte, error) {
-       buf, err := ioutil.ReadFile(v.blockPath(loc))
-       return buf, err
+// If the block could not be found, opened, or read, Get returns a nil
+// slice and whatever non-nil error was returned by Stat or ReadFile.
+func (v *UnixVolume) Get(loc string) ([]byte, error) {
+       path := v.blockPath(loc)
+       stat, err := os.Stat(path)
+       if err != nil {
+               return nil, err
+       }
+       if stat.Size() < 0 {
+               return nil, os.ErrInvalid
+       } else if stat.Size() == 0 {
+               return bufs.Get(0), nil
+       } else if stat.Size() > BLOCKSIZE {
+               return nil, TooLongError
+       }
+       f, err := os.Open(path)
+       if err != nil {
+               return nil, err
+       }
+       defer f.Close()
+       buf := bufs.Get(int(stat.Size()))
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
+       _, err = io.ReadFull(f, buf)
+       if err != nil {
+               bufs.Put(buf)
+               return nil, err
+       }
+       return buf, nil
 }
 
-// Write stores a block of data identified by the locator string
+// Put stores a block of data identified by the locator string
 // "loc".  It returns nil on success.  If the volume is full, it
 // returns a FullError.  If the write fails due to some other error,
 // that error is returned.
-//
-func (v *UnixVolume) Write(loc string, block []byte) error {
+func (v *UnixVolume) Put(loc string, block []byte) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
        if v.IsFull() {
                return FullError
        }
@@ -161,8 +117,14 @@ func (v *UnixVolume) Write(loc string, block []byte) error {
        }
        bpath := v.blockPath(loc)
 
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
        if _, err := tmpfile.Write(block); err != nil {
                log.Printf("%s: writing to %s: %s\n", v, bpath, err)
+               tmpfile.Close()
+               os.Remove(tmpfile.Name())
                return err
        }
        if err := tmpfile.Close(); err != nil {
@@ -205,14 +167,13 @@ func (v *UnixVolume) Status() *VolumeStatus {
        return &VolumeStatus{v.root, devnum, free, used}
 }
 
-// Index returns a list of blocks found on this volume which begin with
-// the specified prefix. If the prefix is an empty string, Index returns
-// a complete list of blocks.
+// IndexTo writes (to the given Writer) a list of blocks found on this
+// volume which begin with the specified prefix. If the prefix is an
+// empty string, IndexTo writes a complete list of blocks.
 //
-// The return value is a multiline string (separated by
-// newlines). Each line is in the format
+// Each block is given in the format
 //
-//     locator+size modification-time
+//     locator+size modification-time {newline}
 //
 // e.g.:
 //
@@ -220,38 +181,32 @@ func (v *UnixVolume) Status() *VolumeStatus {
 //     e4d41e6fd68460e0e3fc18cc746959d2+67108864 1377796043
 //     e4de7a2810f5554cd39b36d8ddb132ff+67108864 1388701136
 //
-func (v *UnixVolume) Index(prefix string) (output string) {
-       filepath.Walk(v.root,
+func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
+       return filepath.Walk(v.root,
                func(path string, info os.FileInfo, err error) error {
-                       // This WalkFunc inspects each path in the volume
-                       // and prints an index line for all files that begin
-                       // with prefix.
                        if err != nil {
-                               log.Printf("IndexHandler: %s: walking to %s: %s",
+                               log.Printf("%s: IndexTo Walk error at %s: %s",
                                        v, path, err)
                                return nil
                        }
-                       locator := filepath.Base(path)
-                       // Skip directories that do not match prefix.
-                       // We know there is nothing interesting inside.
+                       basename := filepath.Base(path)
                        if info.IsDir() &&
-                               !strings.HasPrefix(locator, prefix) &&
-                               !strings.HasPrefix(prefix, locator) {
+                               !strings.HasPrefix(basename, prefix) &&
+                               !strings.HasPrefix(prefix, basename) {
+                               // Skip directories that do not match
+                               // prefix. We know there is nothing
+                               // interesting inside.
                                return filepath.SkipDir
                        }
-                       // Skip any file that is not apparently a locator, e.g. .meta files
-                       if !IsValidLocator(locator) {
+                       if info.IsDir() ||
+                               !IsValidLocator(basename) ||
+                               !strings.HasPrefix(basename, prefix) {
                                return nil
                        }
-                       // Print filenames beginning with prefix
-                       if !info.IsDir() && strings.HasPrefix(locator, prefix) {
-                               output = output + fmt.Sprintf(
-                                       "%s+%d %d\n", locator, info.Size(), info.ModTime().Unix())
-                       }
-                       return nil
+                       _, err = fmt.Fprintf(w, "%s+%d %d\n",
+                               basename, info.Size(), info.ModTime().Unix())
+                       return err
                })
-
-       return
 }
 
 func (v *UnixVolume) Delete(loc string) error {
@@ -263,6 +218,13 @@ func (v *UnixVolume) Delete(loc string) error {
        // Delete() will read the correct up-to-date timestamp and choose not to
        // delete the file.
 
+       if v.readonly {
+               return MethodDisabledError
+       }
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
        p := v.blockPath(loc)
        f, err := os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
        if err != nil {
@@ -274,15 +236,15 @@ func (v *UnixVolume) Delete(loc string) error {
        }
        defer unlockfile(f)
 
-       // If the block has been PUT more recently than -permission_ttl,
-       // return success without removing the block.  This guards against
-       // a race condition where a block is old enough that Data Manager
-       // has added it to the trash list, but the user submitted a PUT
-       // for the block since then.
+       // If the block has been PUT in the last blob_signature_ttl
+       // seconds, return success without removing the block. This
+       // protects data from garbage collection until it is no longer
+       // possible for clients to retrieve the unreferenced blocks
+       // anyway (because the permission signatures have expired).
        if fi, err := os.Stat(p); err != nil {
                return err
        } else {
-               if time.Since(fi.ModTime()) < permission_ttl {
+               if time.Since(fi.ModTime()) < blob_signature_ttl {
                        return nil
                }
        }
@@ -350,6 +312,10 @@ func (v *UnixVolume) String() string {
        return fmt.Sprintf("[UnixVolume %s]", v.root)
 }
 
+func (v *UnixVolume) Writable() bool {
+       return !v.readonly
+}
+
 // lockfile and unlockfile use flock(2) to manage kernel file locks.
 func lockfile(f *os.File) error {
        return syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
index 7a10fc5c60a3db5c22001f9264a77561016a6181..1320d315858d83b7c84064e528a43b792ab5f19e 100644 (file)
@@ -10,25 +10,26 @@ import (
        "time"
 )
 
-func TempUnixVolume(t *testing.T, serialize boolUnixVolume {
+func TempUnixVolume(t *testing.T, serialize bool, readonly bool) *UnixVolume {
        d, err := ioutil.TempDir("", "volume_test")
        if err != nil {
                t.Fatal(err)
        }
-       return MakeUnixVolume(d, serialize)
+       return &UnixVolume{
+               root:      d,
+               serialize: serialize,
+               readonly:  readonly,
+       }
 }
 
-func _teardown(v UnixVolume) {
-       if v.queue != nil {
-               close(v.queue)
-       }
+func _teardown(v *UnixVolume) {
        os.RemoveAll(v.root)
 }
 
-// store writes a Keep block directly into a UnixVolume, for testing
-// UnixVolume methods.
-//
-func _store(t *testing.T, vol UnixVolume, filename string, block []byte) {
+// _store writes a Keep block directly into a UnixVolume, bypassing
+// the overhead and safeguards of Put(). Useful for storing bogus data
+// and isolating unit tests from Put() behavior.
+func _store(t *testing.T, vol *UnixVolume, filename string, block []byte) {
        blockdir := fmt.Sprintf("%s/%s", vol.root, filename[:3])
        if err := os.MkdirAll(blockdir, 0755); err != nil {
                t.Fatal(err)
@@ -44,7 +45,7 @@ func _store(t *testing.T, vol UnixVolume, filename string, block []byte) {
 }
 
 func TestGet(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
        defer _teardown(v)
        _store(t, v, TEST_HASH, TEST_BLOCK)
 
@@ -58,7 +59,7 @@ func TestGet(t *testing.T) {
 }
 
 func TestGetNotFound(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
        defer _teardown(v)
        _store(t, v, TEST_HASH, TEST_BLOCK)
 
@@ -74,7 +75,7 @@ func TestGetNotFound(t *testing.T) {
 }
 
 func TestPut(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
        defer _teardown(v)
 
        err := v.Put(TEST_HASH, TEST_BLOCK)
@@ -91,7 +92,7 @@ func TestPut(t *testing.T) {
 }
 
 func TestPutBadVolume(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
        defer _teardown(v)
 
        os.Chmod(v.root, 000)
@@ -101,11 +102,44 @@ func TestPutBadVolume(t *testing.T) {
        }
 }
 
+func TestUnixVolumeReadonly(t *testing.T) {
+       v := TempUnixVolume(t, false, false)
+       defer _teardown(v)
+
+       // First write something before marking readonly
+       err := v.Put(TEST_HASH, TEST_BLOCK)
+       if err != nil {
+               t.Error("got err %v, expected nil", err)
+       }
+
+       v.readonly = true
+
+       _, err = v.Get(TEST_HASH)
+       if err != nil {
+               t.Error("got err %v, expected nil", err)
+       }
+
+       err = v.Put(TEST_HASH, TEST_BLOCK)
+       if err != MethodDisabledError {
+               t.Error("got err %v, expected MethodDisabledError", err)
+       }
+
+       err = v.Touch(TEST_HASH)
+       if err != MethodDisabledError {
+               t.Error("got err %v, expected MethodDisabledError", err)
+       }
+
+       err = v.Delete(TEST_HASH)
+       if err != MethodDisabledError {
+               t.Error("got err %v, expected MethodDisabledError", err)
+       }
+}
+
 // TestPutTouch
 //     Test that when applying PUT to a block that already exists,
 //     the block's modification time is updated.
 func TestPutTouch(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
        defer _teardown(v)
 
        if err := v.Put(TEST_HASH, TEST_BLOCK); err != nil {
@@ -165,7 +199,7 @@ func TestPutTouch(t *testing.T) {
 //
 func TestGetSerialized(t *testing.T) {
        // Create a volume with I/O serialization enabled.
-       v := TempUnixVolume(t, true)
+       v := TempUnixVolume(t, true, false)
        defer _teardown(v)
 
        _store(t, v, TEST_HASH, TEST_BLOCK)
@@ -214,7 +248,7 @@ func TestGetSerialized(t *testing.T) {
 
 func TestPutSerialized(t *testing.T) {
        // Create a volume with I/O serialization enabled.
-       v := TempUnixVolume(t, true)
+       v := TempUnixVolume(t, true, false)
        defer _teardown(v)
 
        sem := make(chan int)
@@ -274,7 +308,7 @@ func TestPutSerialized(t *testing.T) {
 }
 
 func TestIsFull(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
        defer _teardown(v)
 
        full_path := v.root + "/full"
index 70817627dfe8d3194435a7a31f1df8e330e37ed5..0fab1b0fec5f3e0fd0696460089028ab244cfb66 100644 (file)
@@ -104,12 +104,12 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
         else:
             self._later.prepare_arvados_node(arvados_node)
 
-    @ComputeNodeStateChangeBase._retry()
+    @ComputeNodeStateChangeBase._retry(config.ARVADOS_ERRORS)
     def create_arvados_node(self):
         self.arvados_node = self._arvados.nodes().create(body={}).execute()
         self._later.create_cloud_node()
 
-    @ComputeNodeStateChangeBase._retry()
+    @ComputeNodeStateChangeBase._retry(config.ARVADOS_ERRORS)
     def prepare_arvados_node(self, node):
         self.arvados_node = self._arvados.nodes().update(
             uuid=node['uuid'],
@@ -139,8 +139,10 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
         self._finished()
 
     def stop_if_no_cloud_node(self):
-        if self.cloud_node is None:
-            self.stop()
+        if self.cloud_node is not None:
+            return False
+        self.stop()
+        return True
 
 
 class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
index 836b673e9267818d37bd6bd750465a8e33a2b2b4..af8e6081cb78fec5db210ad57d44f05ee0b3706e 100644 (file)
@@ -202,10 +202,10 @@ class NodeManagerDaemonActor(actor_class):
                    [self.cloud_nodes, self.booted, self.booting])
 
     def _nodes_busy(self):
-        return sum(1 for idle in
-                   pykka.get_all(rec.actor.in_state('idle') for rec in
+        return sum(1 for busy in
+                   pykka.get_all(rec.actor.in_state('busy') for rec in
                                  self.cloud_nodes.nodes.itervalues())
-                   if idle is False)
+                   if busy)
 
     def _nodes_wanted(self):
         up_count = self._nodes_up()
@@ -299,8 +299,7 @@ class NodeManagerDaemonActor(actor_class):
         if (nodes_excess < 1) or not self.booting:
             return None
         for key, node in self.booting.iteritems():
-            node.stop_if_no_cloud_node().get()
-            if not node.actor_ref.is_alive():
+            if node.stop_if_no_cloud_node().get():
                 del self.booting[key]
                 if nodes_excess > 1:
                     self._later.stop_booting_node()
@@ -328,7 +327,7 @@ class NodeManagerDaemonActor(actor_class):
                 break
         else:
             return None
-        if record.arvados_node is None:
+        if not record.actor.in_state('idle', 'busy').get():
             self._begin_node_shutdown(record.actor, cancellable=False)
 
     def node_finished_shutdown(self, shutdown_actor):
@@ -345,12 +344,14 @@ class NodeManagerDaemonActor(actor_class):
     def shutdown(self):
         self._logger.info("Shutting down after signal.")
         self.poll_stale_after = -1  # Inhibit starting/stopping nodes
-        for bootnode in self.booting.itervalues():
-            bootnode.stop_if_no_cloud_node()
+        setup_stops = {key: node.stop_if_no_cloud_node()
+                       for key, node in self.booting.iteritems()}
+        self.booting = {key: self.booting[key]
+                        for key in setup_stops if not setup_stops[key].get()}
         self._later.await_shutdown()
 
     def await_shutdown(self):
-        if any(node.actor_ref.is_alive() for node in self.booting.itervalues()):
+        if self.booting:
             self._timer.schedule(time.time() + 1, self._later.await_shutdown)
         else:
             self.stop()
index b8cf0ee408130f8203ac1d53d332ffb28d9b5659..96a70c6c96c794ebb17d05bf7880defe89ca014c 100644 (file)
@@ -79,14 +79,16 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.make_mocks(
             arverror.ApiError(httplib2.Response({'status': '500'}), ""))
         self.make_actor()
-        self.setup_actor.stop_if_no_cloud_node()
+        self.assertTrue(
+            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
         self.assertTrue(
             self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
 
     def test_no_stop_when_cloud_node(self):
         self.make_actor()
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT)
+        self.assertFalse(
+            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
         self.assertTrue(self.stop_proxy(self.setup_actor),
                         "actor was stopped by stop_if_no_cloud_node")
 
index dc8fdc3f8496b9d90d43fdabca4b922120875a6f..b406f1357671f0efe85813004ff2ddb0629584a1 100644 (file)
@@ -183,6 +183,19 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.last_setup.arvados_node.get.return_value = arv_node
         return self.last_setup
 
+    def test_no_new_node_when_booted_node_not_usable(self):
+        cloud_node = testutil.cloud_node_mock(4)
+        arv_node = testutil.arvados_node_mock(4, crunch_worker_state='down')
+        setup = self.start_node_boot(cloud_node, arv_node)
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.daemon.update_arvados_nodes([arv_node])
+        self.daemon.update_server_wishlist(
+            [testutil.MockSize(1)]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.node_setup.start.call_count)
+
     def test_no_duplication_when_booting_node_listed_fast(self):
         # Test that we don't start two ComputeNodeMonitorActors when
         # we learn about a booting node through a listing before we
@@ -270,6 +283,18 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.stop_proxy(self.daemon)
         self.assertShutdownCancellable(False)
 
+    def test_booted_node_shut_down_when_never_working(self):
+        cloud_node = testutil.cloud_node_mock(4)
+        arv_node = testutil.arvados_node_mock(4, crunch_worker_state='down')
+        setup = self.start_node_boot(cloud_node, arv_node)
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
+        self.timer.deliver()
+        self.stop_proxy(self.daemon)
+        self.assertShutdownCancellable(False)
+
     def test_node_that_pairs_not_considered_failed_boot(self):
         cloud_node = testutil.cloud_node_mock(3)
         arv_node = testutil.arvados_node_mock(3)
@@ -282,12 +307,42 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.stop_proxy(self.daemon)
         self.assertFalse(self.node_shutdown.start.called)
 
+    def test_node_that_pairs_busy_not_considered_failed_boot(self):
+        cloud_node = testutil.cloud_node_mock(5)
+        arv_node = testutil.arvados_node_mock(5, job_uuid=True)
+        setup = self.start_node_boot(cloud_node, arv_node)
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
+        self.timer.deliver()
+        self.stop_proxy(self.daemon)
+        self.assertFalse(self.node_shutdown.start.called)
+
     def test_booting_nodes_shut_down(self):
         self.make_daemon(want_sizes=[testutil.MockSize(1)])
         self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
         self.stop_proxy(self.daemon)
         self.assertTrue(self.last_setup.stop_if_no_cloud_node.called)
 
+    def test_all_booting_nodes_tried_to_shut_down(self):
+        size = testutil.MockSize(2)
+        self.make_daemon(want_sizes=[size])
+        self.daemon.max_nodes.get(self.TIMEOUT)
+        setup1 = self.last_setup
+        setup1.stop_if_no_cloud_node().get.return_value = False
+        setup1.stop_if_no_cloud_node.reset_mock()
+        self.daemon.update_server_wishlist([size, size]).get(self.TIMEOUT)
+        self.daemon.max_nodes.get(self.TIMEOUT)
+        self.assertIsNot(setup1, self.last_setup)
+        self.last_setup.stop_if_no_cloud_node().get.return_value = True
+        self.last_setup.stop_if_no_cloud_node.reset_mock()
+        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
+        self.daemon.max_nodes.get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.last_setup.stop_if_no_cloud_node.call_count)
+        self.assertTrue(setup1.stop_if_no_cloud_node.called)
+
     def test_shutdown_declined_at_wishlist_capacity(self):
         cloud_node = testutil.cloud_node_mock(1)
         size = testutil.MockSize(1)
@@ -384,6 +439,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
 
     def test_clean_shutdown_waits_for_node_setup_finish(self):
         new_node = self.start_node_boot()
+        new_node.stop_if_no_cloud_node().get.return_value = False
+        new_node.stop_if_no_cloud_node.reset_mock()
         self.daemon.shutdown().get(self.TIMEOUT)
         self.assertTrue(new_node.stop_if_no_cloud_node.called)
         self.daemon.node_up(new_node).get(self.TIMEOUT)
@@ -393,9 +450,11 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
             self.daemon.actor_ref.actor_stopped.wait(self.TIMEOUT))
 
     def test_wishlist_ignored_after_shutdown(self):
-        size = testutil.MockSize(2)
-        self.make_daemon(want_sizes=[size])
+        new_node = self.start_node_boot()
+        new_node.stop_if_no_cloud_node().get.return_value = False
+        new_node.stop_if_no_cloud_node.reset_mock()
         self.daemon.shutdown().get(self.TIMEOUT)
+        size = testutil.MockSize(2)
         self.daemon.update_server_wishlist([size] * 2).get(self.TIMEOUT)
         self.timer.deliver()
         self.stop_proxy(self.daemon)