Merge branch 'master' into 7454-azure-custom-data
authorPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 11 Dec 2015 19:59:02 +0000 (14:59 -0500)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 11 Dec 2015 19:59:02 +0000 (14:59 -0500)
265 files changed:
README
apps/workbench/Gemfile.lock
apps/workbench/app/assets/javascripts/add_group.js [new file with mode: 0644]
apps/workbench/app/controllers/collections_controller.rb
apps/workbench/app/helpers/version_helper.rb
apps/workbench/app/models/group.rb
apps/workbench/app/models/keep_disk.rb
apps/workbench/app/models/keep_service.rb
apps/workbench/app/models/link.rb
apps/workbench/app/models/node.rb
apps/workbench/app/models/pipeline_instance.rb
apps/workbench/app/models/user.rb
apps/workbench/app/models/virtual_machine.rb
apps/workbench/app/views/application/404.html.erb
apps/workbench/app/views/application/_report_error.html.erb
apps/workbench/app/views/application/_report_issue_popup.html.erb
apps/workbench/app/views/getting_started/_getting_started_popup.html.erb
apps/workbench/app/views/layouts/body.html.erb
apps/workbench/app/views/pipeline_instances/_running_component.html.erb
apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
apps/workbench/app/views/pipeline_instances/_show_log.html.erb
apps/workbench/app/views/users/_add_group_modal.html.erb [new file with mode: 0644]
apps/workbench/app/views/users/_show_admin.html.erb
apps/workbench/config/application.default.yml
apps/workbench/config/application.rb
apps/workbench/lib/app_version.rb [new file with mode: 0644]
apps/workbench/test/controllers/application_controller_test.rb
apps/workbench/test/controllers/collections_controller_test.rb
apps/workbench/test/controllers/projects_controller_test.rb
apps/workbench/test/helpers/download_helper.rb [new file with mode: 0644]
apps/workbench/test/helpers/share_object_helper.rb
apps/workbench/test/integration/anonymous_access_test.rb
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/integration/collection_upload_test.rb
apps/workbench/test/integration/collections_test.rb
apps/workbench/test/integration/download_test.rb [new file with mode: 0644]
apps/workbench/test/integration/errors_test.rb
apps/workbench/test/integration/jobs_test.rb
apps/workbench/test/integration/pipeline_instances_test.rb
apps/workbench/test/integration/users_test.rb
apps/workbench/test/integration/virtual_machines_test.rb
apps/workbench/test/integration_helper.rb
apps/workbench/test/test_helper.rb
backports/python-ciso8601/fpm-info.sh
backports/python-llfuse/fpm-info.sh
backports/python-pycrypto/fpm-info.sh
backports/python-pycurl/fpm-info.sh
crunch_scripts/test/task_output_dir [new file with mode: 0755]
doc/_config.yml
doc/_includes/_install_ruby_and_bundler.liquid
doc/_includes/_run_command_simple_example.liquid
doc/_includes/_ssh_addkey.liquid
doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid
doc/api/schema/Job.html.textile.liquid
doc/index.html.liquid
doc/install/configure-azure-blob-storage.html.textile.liquid [new file with mode: 0644]
doc/install/install-api-server.html.textile.liquid
doc/install/install-arv-git-httpd.html.textile.liquid
doc/install/install-compute-node.html.textile.liquid
doc/install/install-crunch-dispatch.html.textile.liquid
doc/install/install-keep-web.html.textile.liquid [new file with mode: 0644]
doc/install/install-keepproxy.html.textile.liquid
doc/install/install-keepstore.html.textile.liquid
doc/install/install-manual-prerequisites.html.textile.liquid
doc/install/install-sso.html.textile.liquid
doc/install/install-workbench-app.html.textile.liquid
doc/sdk/cli/subcommands.html.textile.liquid
doc/user/getting_started/ssh-access-unix.html.textile.liquid
doc/user/getting_started/ssh-access-windows.html.textile.liquid
doc/user/getting_started/vm-login-with-webshell.html.textile.liquid
doc/user/index.html.textile.liquid
doc/user/topics/arv-docker.html.textile.liquid
doc/user/tutorials/git-arvados-guide.html.textile.liquid
doc/user/tutorials/tutorial-keep-mount.html.textile.liquid
doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
sdk/cli/LICENSE-2.0.txt [new file with mode: 0644]
sdk/cli/arvados-cli.gemspec
sdk/cli/bin/arv
sdk/cli/bin/arv-run-pipeline-instance
sdk/cli/bin/crunch-job
sdk/cli/test/test_arv-get.rb
sdk/cli/test/test_arv-keep-get.rb [new file with mode: 0644]
sdk/cli/test/test_arv-keep-put.rb [moved from sdk/cli/test/test_arv-put.rb with 99% similarity]
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/bin/arvados-cwl-runner [new file with mode: 0755]
sdk/cwl/setup.py
sdk/go/arvadosclient/arvadosclient.go
sdk/go/arvadosclient/arvadosclient_test.go
sdk/go/arvadostest/fixtures.go [new file with mode: 0644]
sdk/go/arvadostest/run_servers.go
sdk/go/arvadostest/stub.go [new file with mode: 0644]
sdk/go/auth/auth.go
sdk/go/blockdigest/blockdigest.go
sdk/go/blockdigest/blockdigest_test.go
sdk/go/crunchrunner/crunchrunner.go [new file with mode: 0644]
sdk/go/crunchrunner/crunchrunner_test.go [new file with mode: 0644]
sdk/go/crunchrunner/upload.go [new file with mode: 0644]
sdk/go/crunchrunner/upload_test.go [new file with mode: 0644]
sdk/go/keepclient/collectionreader.go [new file with mode: 0644]
sdk/go/keepclient/collectionreader_test.go [new file with mode: 0644]
sdk/go/keepclient/discover.go [new file with mode: 0644]
sdk/go/keepclient/discover_test.go [new file with mode: 0644]
sdk/go/keepclient/keepclient.go
sdk/go/keepclient/keepclient_test.go
sdk/go/keepclient/perms.go [new file with mode: 0644]
sdk/go/keepclient/perms_test.go [new file with mode: 0644]
sdk/go/keepclient/support.go
sdk/go/logger/logger.go
sdk/go/manifest/manifest.go
sdk/go/manifest/manifest_test.go
sdk/go/streamer/streamer_test.go
sdk/go/streamer/transfer.go
sdk/pam/LICENSE-2.0.txt [new file with mode: 0644]
sdk/pam/MANIFEST.in
sdk/pam/setup.py
sdk/python/LICENSE-2.0.txt [new file with mode: 0644]
sdk/python/MANIFEST.in
sdk/python/arvados/__init__.py
sdk/python/arvados/_ranges.py
sdk/python/arvados/api.py
sdk/python/arvados/arvfile.py
sdk/python/arvados/commands/put.py
sdk/python/arvados/commands/run.py
sdk/python/arvados/crunch.py [new file with mode: 0644]
sdk/python/arvados/events.py
sdk/python/arvados/keep.py
sdk/python/setup.py
sdk/python/tests/arvados_testutil.py
sdk/python/tests/keepstub.py
sdk/python/tests/manifest_examples.py
sdk/python/tests/nginx.conf
sdk/python/tests/run_test_server.py
sdk/python/tests/test_api.py
sdk/python/tests/test_arv_ls.py
sdk/python/tests/test_arv_put.py
sdk/python/tests/test_arvfile.py
sdk/python/tests/test_collections.py
sdk/python/tests/test_crunch.py [new file with mode: 0644]
sdk/python/tests/test_keep_client.py
sdk/python/tests/test_retry_job_helpers.py [new file with mode: 0644]
sdk/ruby/LICENSE-2.0.txt [new file with mode: 0644]
sdk/ruby/arvados.gemspec
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/arvados/v1/keep_services_controller.rb
services/api/app/controllers/arvados/v1/schema_controller.rb
services/api/app/controllers/database_controller.rb
services/api/config/application.default.yml
services/api/config/application.yml.example
services/api/config/initializers/app_version.rb [new file with mode: 0644]
services/api/lib/app_version.rb [new file with mode: 0644]
services/api/lib/create_superuser_token.rb [new file with mode: 0755]
services/api/lib/crunch_dispatch.rb [new file with mode: 0644]
services/api/lib/tasks/config_check.rake
services/api/script/create_superuser_token.rb
services/api/script/crunch-dispatch.rb
services/api/script/salvage_collection.rb
services/api/test/fixtures/api_client_authorizations.yml
services/api/test/fixtures/collections.yml
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/nodes.yml
services/api/test/functional/arvados/v1/keep_services_controller_test.rb
services/api/test/functional/arvados/v1/schema_controller_test.rb
services/api/test/unit/app_version_test.rb [new file with mode: 0644]
services/api/test/unit/create_superuser_token_test.rb [new file with mode: 0644]
services/api/test/unit/crunch_dispatch_test.rb [new file with mode: 0644]
services/arv-git-httpd/git_handler_test.go
services/arv-web/arv-web.py
services/crunchstat/crunchstat_test.go
services/datamanager/collection/collection.go
services/datamanager/collection/collection_test.go
services/datamanager/datamanager.go
services/datamanager/datamanager_test.go
services/datamanager/keep/keep.go
services/datamanager/keep/keep_test.go
services/datamanager/summary/file.go
services/datamanager/summary/pull_list.go
services/dockercleaner/MANIFEST.in [new file with mode: 0644]
services/dockercleaner/agpl-3.0.txt [new file with mode: 0644]
services/dockercleaner/arvados_docker/cleaner.py
services/dockercleaner/setup.py
services/dockercleaner/tests/test_cleaner.py
services/fuse/MANIFEST.in
services/fuse/agpl-3.0.txt [new file with mode: 0644]
services/fuse/arvados_fuse/__init__.py
services/fuse/arvados_fuse/command.py [new file with mode: 0644]
services/fuse/arvados_fuse/crunchstat.py [new file with mode: 0644]
services/fuse/arvados_fuse/fresh.py
services/fuse/arvados_fuse/fusedir.py
services/fuse/arvados_fuse/fusefile.py
services/fuse/bin/arv-mount
services/fuse/setup.py
services/fuse/tests/integration_test.py [new file with mode: 0644]
services/fuse/tests/mount_test_base.py
services/fuse/tests/test_command_args.py [new file with mode: 0644]
services/fuse/tests/test_mount.py
services/fuse/tests/test_tmp_collection.py [new file with mode: 0644]
services/keep-web/.gitignore [new file with mode: 0644]
services/keep-web/anonymous.go [new file with mode: 0644]
services/keep-web/doc.go [new file with mode: 0644]
services/keep-web/handler.go [new file with mode: 0644]
services/keep-web/handler_test.go [new file with mode: 0644]
services/keep-web/main.go [new file with mode: 0644]
services/keep-web/server.go [new file with mode: 0644]
services/keep-web/server_test.go [new file with mode: 0644]
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy_test.go
services/keepproxy/pkg-extras/etc/default/keepproxy [new file with mode: 0644]
services/keepproxy/pkg-extras/etc/init.d/keepproxy [new file with mode: 0755]
services/keepstore/azure_blob_volume.go [new file with mode: 0644]
services/keepstore/azure_blob_volume_test.go [new file with mode: 0644]
services/keepstore/bufferpool_test.go
services/keepstore/collision.go
services/keepstore/collision_test.go
services/keepstore/gocheck_test.go [new file with mode: 0644]
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/handlers_with_generic_volume_test.go
services/keepstore/keepstore.go
services/keepstore/keepstore_test.go
services/keepstore/logging_router.go
services/keepstore/perms.go
services/keepstore/perms_test.go
services/keepstore/pull_worker.go
services/keepstore/pull_worker_integration_test.go
services/keepstore/pull_worker_test.go
services/keepstore/s3_volume.go [new file with mode: 0644]
services/keepstore/s3_volume_test.go [new file with mode: 0644]
services/keepstore/volume.go
services/keepstore/volume_generic_test.go
services/keepstore/volume_test.go
services/keepstore/volume_unix.go
services/keepstore/volume_unix_test.go
services/login-sync/agpl-3.0.txt [new file with mode: 0644]
services/login-sync/arvados-login-sync.gemspec
services/nodemanager/MANIFEST.in
services/nodemanager/agpl-3.0.txt [new file with mode: 0644]
services/nodemanager/arvnodeman/computenode/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/computenode/driver/azure.py
services/nodemanager/arvnodeman/computenode/driver/dummy.py
services/nodemanager/arvnodeman/computenode/driver/ec2.py
services/nodemanager/arvnodeman/computenode/driver/gce.py
services/nodemanager/arvnodeman/config.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/arvnodeman/jobqueue.py
services/nodemanager/arvnodeman/launcher.py
services/nodemanager/doc/azure.example.cfg
services/nodemanager/doc/ec2.example.cfg
services/nodemanager/doc/gce.example.cfg
services/nodemanager/setup.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_computenode_dispatch_slurm.py
services/nodemanager/tests/test_computenode_driver_gce.py
services/nodemanager/tests/test_config.py
services/nodemanager/tests/test_daemon.py
services/nodemanager/tests/test_jobqueue.py
services/nodemanager/tests/testutil.py
tools/keep-exercise/.gitignore [new file with mode: 0644]
tools/keep-exercise/keep-exercise.go [new file with mode: 0644]
tools/keep-rsync/.gitignore [new file with mode: 0644]
tools/keep-rsync/keep-rsync.go [new file with mode: 0644]
tools/keep-rsync/keep-rsync_test.go [new file with mode: 0644]

diff --git a/README b/README
index c7a36c355b4a2b94dfab45c9748330022a788c91..7a2c5ef018a9827db24212beb38d2579bbe1438f 100644 (file)
--- a/README
+++ b/README
@@ -4,10 +4,10 @@ The main Arvados web site is
   https://arvados.org
 
 The Arvados public wiki is located at 
-  https://arvados.org/projects/arvados/wiki
+  https://dev.arvados.org/projects/arvados/wiki
 
 The Arvados public bug tracker is located at 
-  https://arvados.org/projects/arvados/issues
+  https://dev.arvados.org/projects/arvados/issues
 
 For support see 
   http://doc.arvados.org/user/getting_started/community.html
index 20b8d6164ccca273e11756928a21c1a17851f07a..303d5830de18774db271edb7891397000e1fddc8 100644 (file)
@@ -68,13 +68,13 @@ GEM
       net-sftp (>= 2.0.0)
       net-ssh (>= 2.0.14)
       net-ssh-gateway (>= 1.1.0)
-    capybara (2.4.4)
+    capybara (2.5.0)
       mime-types (>= 1.16)
       nokogiri (>= 1.3.3)
       rack (>= 1.0.0)
       rack-test (>= 0.5.4)
       xpath (~> 2.0)
-    childprocess (0.5.5)
+    childprocess (0.5.6)
       ffi (~> 1.0, >= 1.0.11)
     cliver (0.3.2)
     coffee-rails (4.1.0)
@@ -98,7 +98,7 @@ GEM
     fast_stack (0.1.0)
       rake
       rake-compiler
-    ffi (1.9.6)
+    ffi (1.9.10)
     flamegraph (0.1.0)
       fast_stack
     google-api-client (0.6.4)
@@ -132,14 +132,14 @@ GEM
     mail (2.6.3)
       mime-types (>= 1.16, < 3)
     metaclass (0.0.4)
-    mime-types (2.6.1)
+    mime-types (2.99)
     mini_portile (0.6.2)
     minitest (5.7.0)
     mocha (1.1.0)
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1)
       railties (> 3.1, < 5)
-    multi_json (1.11.1)
+    multi_json (1.11.2)
     multipart-post (1.2.0)
     net-scp (1.2.1)
       net-ssh (>= 2.6.5)
@@ -148,7 +148,7 @@ GEM
     net-ssh (2.9.2)
     net-ssh-gateway (1.2.0)
       net-ssh (>= 2.6.5)
-    nokogiri (1.6.5)
+    nokogiri (1.6.6.4)
       mini_portile (~> 0.6.0)
     oj (2.11.2)
     passenger (4.0.57)
@@ -192,7 +192,7 @@ GEM
     ref (1.0.5)
     ruby-debug-passenger (0.2.0)
     ruby-prof (0.15.2)
-    rubyzip (1.1.6)
+    rubyzip (1.1.7)
     rvm-capistrano (1.5.5)
       capistrano (~> 2.15.4)
     sass (3.4.9)
@@ -202,7 +202,7 @@ GEM
       sprockets (>= 2.8, < 4.0)
       sprockets-rails (>= 2.0, < 4.0)
       tilt (~> 1.1)
-    selenium-webdriver (2.44.0)
+    selenium-webdriver (2.48.1)
       childprocess (~> 0.5)
       multi_json (~> 1.0)
       rubyzip (~> 1.0)
@@ -239,7 +239,7 @@ GEM
       execjs (>= 0.3.0)
       json (>= 1.8.0)
     uuidtools (2.1.5)
-    websocket (1.2.1)
+    websocket (1.2.2)
     websocket-driver (0.5.1)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.1)
@@ -294,3 +294,6 @@ DEPENDENCIES
   therubyracer
   uglifier (>= 1.0.3)
   wiselinks
+
+BUNDLED WITH
+   1.10.6
diff --git a/apps/workbench/app/assets/javascripts/add_group.js b/apps/workbench/app/assets/javascripts/add_group.js
new file mode 100644 (file)
index 0000000..16d9c2d
--- /dev/null
@@ -0,0 +1,44 @@
+$(document).on('shown.bs.modal', '#add-group-modal', function(event) {
+    // Disable the submit button on modal loading
+    $submit = $('#add-group-submit');
+    $submit.prop('disabled', true);
+
+    $('input[type=text]', event.target).val('');
+    $('#add-group-error', event.target).hide();
+}).on('input propertychange', '#group_name_input', function(event) {
+    group_name = $(event.target).val();
+    $submit = $('#add-group-submit');
+    $submit.prop('disabled', (group_name === null || group_name === ""));
+}).on('submit', '#add-group-form', function(event) {
+    var $form = $(event.target),
+    $submit = $(':submit', $form),
+    $error = $('#add-group-error', $form),
+    group_name = $('input[name="group_name_input"]', $form).val();
+
+    $submit.prop('disabled', true);
+
+    $error.hide();
+    $.ajax('/groups',
+           {method: 'POST',
+            dataType: 'json',
+            data: {group: {name: group_name, group_class: 'role'}},
+            context: $form}).
+        done(function(data, status, jqxhr) {
+            location.reload();
+        }).
+        fail(function(jqxhr, status, error) {
+            var errlist = jqxhr.responseJSON.errors;
+            var errmsg;
+            if (Array.isArray(errlist)) {
+                errmsg = errlist.join();
+            } else {
+                errmsg = ("The server returned an error when creating " +
+                          "this group (status " + jqxhr.status +
+                          ": " + errlist + ").");
+            }
+            $error.text(errmsg);
+            $error.show();
+            $submit.prop('disabled', false);
+        });
+    return false;
+});
index e01151ca408b567ec9908349bdfd7a51bcd15a2f..f8b359c89060cd9d2703b9303cffaf40f5eef54f 100644 (file)
@@ -1,4 +1,5 @@
 require "arvados/keep"
+require "uri"
 
 class CollectionsController < ApplicationController
   include ActionController::Live
@@ -130,11 +131,34 @@ class CollectionsController < ApplicationController
     usable_token = find_usable_token(tokens) do
       coll = Collection.find(params[:uuid])
     end
+    if usable_token.nil?
+      # Response already rendered.
+      return
+    end
+
+    # If we are configured to use a keep-web server, just redirect to
+    # the appropriate URL.
+    if Rails.configuration.keep_web_url or
+        Rails.configuration.keep_web_download_url
+      opts = {}
+      if usable_token == params[:reader_token]
+        opts[:path_token] = usable_token
+      elsif usable_token == Rails.configuration.anonymous_user_token
+        # Don't pass a token at all
+      else
+        # We pass the current user's real token only if it's necessary
+        # to read the collection.
+        opts[:query_token] = usable_token
+      end
+      opts[:disposition] = params[:disposition] if params[:disposition]
+      return redirect_to keep_web_url(params[:uuid], params[:file], opts)
+    end
+
+    # No keep-web server available. Get the file data with arv-get,
+    # and serve it through Rails.
 
     file_name = params[:file].andand.sub(/^(\.\/|\/|)/, './')
-    if usable_token.nil?
-      return  # Response already rendered.
-    elsif file_name.nil? or not coll.manifest.has_file?(file_name)
+    if file_name.nil? or not coll.manifest.has_file?(file_name)
       return render_not_found
     end
 
@@ -305,6 +329,61 @@ class CollectionsController < ApplicationController
     return nil
   end
 
+  def keep_web_url(uuid_or_pdh, file, opts)
+    munged_id = uuid_or_pdh.sub('+', '-')
+    fmt = {uuid_or_pdh: munged_id}
+
+    tmpl = Rails.configuration.keep_web_url
+    if Rails.configuration.keep_web_download_url and
+        (!tmpl or opts[:disposition] == 'attachment')
+      # Prefer the attachment-only-host when we want an attachment
+      # (and when there is no preview link configured)
+      tmpl = Rails.configuration.keep_web_download_url
+    else
+      check_uri = URI.parse(tmpl % fmt)
+      if opts[:query_token] and
+          not check_uri.host.start_with?(munged_id + "--") and
+          not check_uri.host.start_with?(munged_id + ".")
+        # We're about to pass a token in the query string, but
+        # keep-web can't accept that safely at a single-origin URL
+        # template (unless it's -attachment-only-host).
+        tmpl = Rails.configuration.keep_web_download_url
+        if not tmpl
+          raise ArgumentError, "Download precluded by site configuration"
+        end
+        logger.warn("Using download link, even though inline content " \
+                    "was requested: #{check_uri.to_s}")
+      end
+    end
+
+    if tmpl == Rails.configuration.keep_web_download_url
+      # This takes us to keep-web's -attachment-only-host so there is
+      # no need to add ?disposition=attachment.
+      opts.delete :disposition
+    end
+
+    uri = URI.parse(tmpl % fmt)
+    uri.path += '/' unless uri.path.end_with? '/'
+    if opts[:path_token]
+      uri.path += 't=' + opts[:path_token] + '/'
+    end
+    uri.path += '_/'
+    uri.path += URI.escape(file)
+
+    query = Hash[URI.decode_www_form(uri.query || '')]
+    { query_token: 'api_token',
+      disposition: 'disposition' }.each do |opt, param|
+      if opts.include? opt
+        query[param] = opts[opt]
+      end
+    end
+    unless query.empty?
+      uri.query = URI.encode_www_form(query)
+    end
+
+    uri.to_s
+  end
+
   # Note: several controller and integration tests rely on stubbing
   # file_enumerator to return fake file content.
   def file_enumerator opts
index 6cae78f53efc495f4a54797b39dc6e6e889492a3..5c15986601e1b9b69d772d5391b88146fbec33c9 100644 (file)
@@ -1,30 +1,12 @@
 module VersionHelper
-  # api_version returns the git commit hash for the API server's
-  # current version.  It is extracted from api_version_text, which
-  # returns the source_version provided by the discovery document and
-  # may have the word "-modified" appended to it (if the API server is
-  # running from a locally modified repository).
-
-  def api_version
-    api_version_text.sub(/[^[:xdigit:]].*/, '')
-  end
-
-  def api_version_text
+  # Get the source_version given in the API server's discovery
+  # document.
+  def api_source_version
     arvados_api_client.discovery[:source_version]
   end
 
-  # wb_version and wb_version_text provide the same strings for the
-  # code version that this Workbench is currently running.
-
-  def wb_version
-    Rails.configuration.source_version
-  end
-
-  def wb_version_text
-    wb_version + (Rails.configuration.local_modified or '')
-  end
-
+  # URL for browsing source code for the given version.
   def version_link_target version
-    "https://arvados.org/projects/arvados/repository/changes?rev=#{version}"
+    "https://arvados.org/projects/arvados/repository/changes?rev=#{version.sub(/-.*/, "")}"
   end
 end
index 3f5da155c4ca9fc5ea13d0836551a92c3c11d435..0d358603a830b821a7f4c20874d01dec185b1e9c 100644 (file)
@@ -32,4 +32,8 @@ class Group < ArvadosBase
   def textile_attributes
     [ 'description' ]
   end
+
+  def self.creatable?
+    false
+  end
 end
index 8ced4eb5f331a3a837ccb6a12a6d34b8374a236b..6438fc72d694713ec4656e4417d67edd625a15e4 100644 (file)
@@ -1,5 +1,5 @@
 class KeepDisk < ArvadosBase
   def self.creatable?
-    current_user and current_user.is_admin
+    false
   end
 end
index f27e369b86cf04188d17126920b804022dd16ec5..0c998c4591fcb45eacc5e62165823ecebf8d9cc8 100644 (file)
@@ -1,5 +1,5 @@
 class KeepService < ArvadosBase
   def self.creatable?
-    current_user and current_user.is_admin
+    false
   end
 end
index 271fa0f0103eac4e3197d417ffc31430285b6643..b1bbcff1265ac0b545e210ab7d6621c7dce0b7a8 100644 (file)
@@ -18,4 +18,8 @@ class Link < ArvadosBase
     result = arvados_api_client.api("permissions", "/#{uuid}")
     arvados_api_client.unpack_api_response(result)
   end
+
+  def self.creatable?
+    false
+  end
 end
index e66be83078c9f5dbe2fff3be09dcba17c5fb4bf9..8bf98c6decb3865054c6339105c688f2ec89fcf4 100644 (file)
@@ -1,6 +1,6 @@
 class Node < ArvadosBase
   def self.creatable?
-    current_user and current_user.is_admin
+    false
   end
   def friendly_link_name lookup=nil
     (hostname && !hostname.empty?) ? hostname : uuid
index 03d70b2897063435298707e3068dfa79ca16ff34..6e556d5b75cae13626aad29d1c0a816a5a78b583 100644 (file)
@@ -92,6 +92,10 @@ class PipelineInstance < ArvadosBase
     components_map { |cspec| cspec[:job][:log] rescue nil }
   end
 
+  def job_ids
+    components_map { |cspec| cspec[:job][:uuid] rescue nil }
+  end
+
   def stderr_log_object_uuids
     result = job_uuids.values.compact
     result << uuid
index 3b5b3083fc531b5b97c937bb326d3c32e377f202..8df16f29a44f84ed540ad1c292e9ee5e92da54e4 100644 (file)
@@ -63,4 +63,8 @@ class User < ArvadosBase
   def deletable?
     false
   end
+
+   def self.creatable?
+    current_user and current_user.is_admin
+   end
 end
index 3b44397df5459efb7074f46bd094826192e061eb..e1a208c53d70f4d02828e6a7215b976be2fe616a 100644 (file)
@@ -1,20 +1,25 @@
 class VirtualMachine < ArvadosBase
   attr_accessor :current_user_logins
+
   def self.creatable?
-    current_user.andand.is_admin
+    false
   end
+
   def attributes_for_display
     super.append ['current_user_logins', @current_user_logins]
   end
+
   def editable_attributes
     super - %w(current_user_logins)
   end
+
   def self.attribute_info
     merger = ->(k,a,b) { a.merge(b, &merger) }
     merger [nil,
             {current_user_logins: {column_heading: "logins", type: 'array'}},
             super]
   end
+
   def friendly_link_name lookup=nil
     (hostname && !hostname.empty?) ? hostname : uuid
   end
index aa1ffda017120bb085997e130074e1bd799dace9..ea6e7033cb23c113fecfebfbb3dad8e6a7c01533 100644 (file)
 <% if !current_user %>
 
   <p>
-    (I notice you are not logged in. If you're looking for a private
-    page, you'll need to <%=link_to 'log in', arvados_api_client.arvados_login_url(return_to: strip_token_from_path(request.url))%> first.)
+    <%= link_to(arvados_api_client.arvados_login_url(return_to: strip_token_from_path(request.url)),
+                {class: "btn btn-primary report-issue-modal-window"}) do %>
+      <i class="fa fa-fw fa-sign-in"></i> Log in
+    <% end %>
+    to view private data.
   </p>
 
 <% elsif class_name %>
index 2e449b79d718ef53d7c7e507f46509867447ee2d..d9573a210bfcc6c06b0a9546eb6214fb5ddd13bf 100644 (file)
@@ -1,21 +1,31 @@
+<%
+   popup_params = {
+     popup_type: 'report',
+     current_location: request.url,
+     current_path: request.fullpath,
+     action_method: 'post',
+   }
+   if error_type == "api"
+     popup_params.merge!(
+       api_error_request_url: api_error.andand.request_url || "",
+       api_error_response: api_error.andand.api_response || "",
+     )
+   else
+     popup_params.merge!(error_message: error_message)
+   end
+%>
+
 <p>
-<br/><strong>If you suspect this is a bug, you can help us fix it by sending us a problem report:</strong><br/><br/>
-<% if error_type == 'api' %>
-  <%
-    api_request_url = api_error.andand.request_url ? api_error.request_url : ''
-    api_error_response = api_error.andand.api_response ? api_error.api_response : ''
-  %>
-  Send a problem report right here. <%= link_to report_issue_popup_path(popup_type: 'report', current_location: request.url, current_path: request.fullpath, action_method: 'post', api_error_request_url: api_request_url, api_error_response: api_error_response),
-        {class: 'btn btn-primary report-issue-modal-window', :remote => true, return_to: request.url} do %>
-        <i class="fa fa-fw fa-support"></i> Report problem
-  <% end %>
-<% else %>
-  Send a problem report right here. <%= link_to report_issue_popup_path(popup_type: 'report', current_location: request.url, current_path: request.fullpath, action_method: 'post', error_message: error_message),
-        {class: 'btn btn-primary report-issue-modal-window', :remote => true, return_to: request.url} do %>
-        <i class="fa fa-fw fa-support"></i> Report problem
-  <% end %>
+<%= link_to(report_issue_popup_path(popup_params),
+            {class: 'btn btn-primary report-issue-modal-window', :remote => true, return_to: request.url}) do %>
+  <i class="fa fa-fw fa-support"></i> Report problem
 <% end %>
-<% support_email = Rails.configuration.support_email_address%>
-<br/><br/>
-  If you prefer, send email to: <a href="mailto:<%=support_email%>?subject=Workbench problem report&amp;body=Problem while viewing page <%=request.url%>"><%=support_email%></a>
+
+or
+
+<%= mail_to(Rails.configuration.support_email_address, "email us",
+            subject: "Workbench problem report",
+            body: "Problem while viewing page #{request.url}") %>
+
+if you suspect this is a bug.
 </p>
index 1c964abfe0491b8ad1c77c02767aaf845927093e..1f66146e2626cbfc705a94ea510ba6c062715e84 100644 (file)
@@ -3,18 +3,15 @@
   arvados_base = Rails.configuration.arvados_v1_base
   support_email = Rails.configuration.support_email_address
 
-  api_version_link = link_to api_version_text, version_link_target(api_version)
-  wb_version_link = link_to wb_version_text, version_link_target(wb_version)
-
   additional_info = {}
   additional_info['Current location'] = params[:current_location]
   additional_info['User UUID'] = current_user.uuid if current_user
 
   additional_info_str = additional_info.map {|k,v| "#{k}=#{v}"}.join("\n")
 
-  additional_info['api_version'] = api_version_text
+  additional_info['api_source_version'] = api_source_version
   additional_info['generated_at'] = generated_at
-  additional_info['workbench_version'] = wb_version_text
+  additional_info['workbench_version'] = AppVersion.hash
   additional_info['arvados_base'] = arvados_base
   additional_info['support_email'] = support_email
   additional_info['error_message'] = params[:error_message] if params[:error_message]
         <div class="form-group">
           <label for="wb_version" class="col-sm-4 control-label"> Workbench version </label>
           <div class="col-sm-8">
-            <p class="form-control-static" name="wb_version"><%= wb_version_link %></p>
+            <p class="form-control-static" name="wb_version">
+              <%= link_to AppVersion.hash, version_link_target(AppVersion.hash) %>
+            </p>
           </div>
         </div>
 
         <div class="form-group">
           <label for="server_version" class="col-sm-4 control-label"> API version </label>
           <div class="col-sm-8">
-            <p class="form-control-static" name="server_version"><%= api_version_link %></p>
+            <p class="form-control-static" name="server_version">
+              <%= link_to api_source_version, version_link_target(api_source_version) %>
+            </p>
           </div>
         </div>
 
index 0db0567ec98b22495b6e063479a60ba753b5bc83..3020a1249bca287a41103b6f5924c134b2090adc 100644 (file)
@@ -154,7 +154,7 @@ div.figure p {
             </li><li>
               <strong>Use existing pipelines</strong>: Use best-practices pipelines on your own data with the click of a button.
             </li><li>
-              <strong>Open-source</strong>: Arvados is completely open-source. Check out our <a href="http://arvados.org">developer site</a>.
+              <strong>Open source</strong>: Arvados is completely open source. Check out our <a href="http://dev.arvados.org">developer site</a>.
             </li>
           </ol>
           <p style="margin-top: 1em;">
index 22ccc2f91dd69a7c88fd66bc10e7f817de8bda1d..acb056e9c970635eeef55bee7f1638789ac88fd2 100644 (file)
               <li><%= link_to 'Browse public projects', "/projects/public" %></li>
             <% end %>
             <li class="dropdown hover-dropdown login-menu">
-              <a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">Log in</a>
+              <a href="<%= arvados_api_client.arvados_login_url(return_to: request.url) %>">Log in</a>
               <ul class="dropdown-menu">
                 <li>
-                  <a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">
+                  <a href="<%= arvados_api_client.arvados_login_url(return_to: request.url) %>">
                     <span class="fa fa-lg fa-sign-in"></span>
                     <p style="margin-left: 1.6em; margin-top: -1.35em; margin-bottom: 0em; margin-right: 0.5em;">Log in or register with<br/>any Google account</p>
                   </a>
index 63a2371a1b3dd2dc398c9b672311bd606d64bdf7..cb27f1c46340cf622311c7d92745af2c7c4e87be 100644 (file)
@@ -4,7 +4,7 @@
     <div class="container-fluid">
       <div class="row-fluid">
         <%# column offset 0 %>
-        <div class="col-md-3" style="word-break:break-all;">
+        <div class="col-md-2" style="word-break:break-all;">
           <h4 class="panel-title">
             <a data-toggle="collapse" href="#collapse<%= i %>">
               <%= pj[:name] %> <span class="caret"></span>
           </h4>
         </div>
 
-        <%# column offset 3 %>
+        <%# column offset 2 %>
         <div class="col-md-2 pipeline-instance-spacing">
           <%= pj[:progress_bar] %>
         </div>
 
         <% if current_job %>
+          <%# column offset 4 %>
+          <% if(pipeline_display rescue nil) %>
+            <% if current_job[:state].in? ["Complete", "Failed", "Cancelled"] %>
+              <div class="col-md-1">
+              <% if current_job[:log] %>
+                  <% logCollection = Collection.find? current_job[:log] %>
+                  <% if logCollection %>
+                    <%= link_to "Log", job_path(current_job[:uuid], anchor: "Log") %>
+                  <% else %>
+                    Log unavailable
+                  <% end %>
+              <% end %>
+              </div>
+            <% elsif current_job[:state] == "Running" %>
+              <div class="col-md-1">
+                <% job = Job.find? current_job[:uuid] %>
+                <% if job %>
+                  <%= link_to "Log", job_path(current_job[:uuid], anchor: "Log") %>
+                <% else %>
+                  Log unavailable
+                <% end %>
+              </div>
+            <% end %>
+          <% end %>
+
           <%# column offset 5 %>
           <% if current_job[:state] != "Queued" %>
           <div class="col-md-3">
index 566e3d771e12796b8d8ebb7e273e34fe499def33..4343f2e57b5adbb64dfb0fbabe177b9d7f937b7a 100644 (file)
@@ -97,5 +97,5 @@
 %>
 
 <% pipeline_jobs.each_with_index do |pj, i| %>
-  <%= render partial: 'running_component', locals: {pj: pj, i: i, expanded: false} %>
+  <%= render partial: 'running_component', locals: {pj: pj, i: i, expanded: false, pipeline_display: true} %>
 <% end %>
index 187dce7bd244e44468edbdda111c0ce1166406f6..3a4ec4ead092a68bcd4e1031287019730f55b030 100644 (file)
@@ -1,4 +1,5 @@
 <% log_ids = @object.job_log_ids
+   job_ids = @object.job_ids
    still_logging, done_logging = log_ids.keys.partition { |k| log_ids[k].nil? }
 %>
 
@@ -19,7 +20,7 @@
       <tr>
         <td><%= cname %></td>
         <td><%= link_to("Log for #{cname}",
-                {controller: "collections", action: "show", id: log_ids[cname]})
+                job_path(job_ids[cname], anchor: "Log"))
                 %></td>
       </tr>
       <% end %>
diff --git a/apps/workbench/app/views/users/_add_group_modal.html.erb b/apps/workbench/app/views/users/_add_group_modal.html.erb
new file mode 100644 (file)
index 0000000..8230e56
--- /dev/null
@@ -0,0 +1,27 @@
+<div class="modal" id="add-group-modal" tabindex="-1" role="dialog" aria-labelledby="add-group-label" aria-hidden="true">
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <form id="add-group-form">
+        <div class="modal-header">
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
+          <h4 class="modal-title" id="add-group-label">Add new group</h4>
+        </div>
+        <div class="modal-body form-horizontal">
+          <div class="form-group">
+            <label for="group_name_input" class="col-sm-1 control-label">Name</label>
+            <div class="col-sm-9">
+              <div class="input-group-name">
+                <input type="text" class="form-control" id="group_name_input" name="group_name_input" placeholder="Enter group name"/>
+              </div>
+            </div>
+          </div>
+          <p id="add-group-error" class="alert alert-danger"></p>
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
+          <input type="submit" class="btn btn-primary" id="add-group-submit" name="submit" value="Create">
+        </div>
+      </form>
+    </div>
+  </div>
+</div>
index 54643a1c189476ae55b99e526b26ad85cd492ae8..f83f9a05fd4b35739e4750780de5f817dace5927 100644 (file)
     <div class="panel panel-default">
       <div class="panel-heading">
         Group memberships
+
+        <div class="pull-right">
+          <%= link_to raw('<i class="fa fa-plus"></i> Add new group'), "#",
+                       {class: 'btn btn-xs btn-primary', 'data-toggle' => "modal",
+                        'data-target' => '#add-group-modal'}  %>
+        </div>
       </div>
       <div class="panel-body">
         <div class="alert alert-info">
@@ -92,7 +98,7 @@
         </form>
       </div>
       <div class="panel-footer">
-        To manage these groups (roles), use:
+        These groups (roles) can also be managed from the command line. For example:
         <ul>
           <li><code>arv group create \<br/>--group '{"group_class":"role","name":"New group"}'</code></li>
           <li><code>arv group list \<br/>--filters '[["group_class","=","role"]]' \<br/>--select '["uuid","name"]'</code></li>
 </div>
 
 <div id="user-setup-modal-window" class="modal fade" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true"></div>
+<%= render partial: "add_group_modal" %>
index 1650e12f4dc08da930509eb1352e7f81f88e4937..c4a92d273950bbebe15b0faf29ccbe58b3cde3b4 100644 (file)
@@ -1,45 +1,6 @@
 # Do not use this file for site configuration. Create application.yml
 # instead (see application.yml.example).
 
-<%
-# If you change any of the code in this block, you'll probably also want
-# to update it in API server's application.default.yml.
-def info_cmd(*args, &block)
-  IO.popen(args, "r", chdir: Rails.root, err: "/dev/null", &block)
-end
-
-source_version = ""
-local_modified = false
-if Rails.env == "production"
-  # Read the version from our package's git-commit.version file, if available.
-  begin
-    source_version = IO.read(Rails.root.join("git-commit.version")).strip
-  rescue Errno::ENOENT
-  end
-end
-
-if source_version.empty?
-  begin
-    status_output = false
-    info_cmd("git", "status", "-s") do |git_pipe|
-      git_pipe.each_line do |_|
-        status_output = true
-        # Continue reading the pipe so git doesn't get SIGPIPE.
-      end
-    end
-    if $?.success?
-      info_cmd("git", "log", "-n1", "--format=%H") do |git_pipe|
-        git_pipe.each_line do |line|
-          source_version = line.chomp
-        end
-      end
-      local_modified = status_output
-    end
-  rescue SystemCallError
-  end
-end
-%>
-
 # Below is a sample setting for diagnostics testing.
 # Configure workbench URL as "arvados_workbench_url"
 # Configure test user tokens as "user_tokens".
@@ -84,7 +45,6 @@ development:
   assets.debug: true
   profiling_enabled: true
   site_name: Arvados Workbench (dev)
-  local_modified: "<%= local_modified ? '-modified' : '' %>"
 
   # API server configuration
   arvados_login_base: ~
@@ -222,9 +182,11 @@ common:
   # the profile page.
   user_profile_form_message: Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.
 
-  # source_version
-  source_version: "<%= source_version[0...8] %>"
-  local_modified: false
+  # Override the automatic version string. With the default value of
+  # false, the version string is read from git-commit.version in
+  # Rails.root (included in vendor packages) or determined by invoking
+  # "git log".
+  source_version: false
 
   # report notification to and from addresses
   issue_reporter_email_from: arvados@example.com
@@ -263,3 +225,32 @@ common:
   # E.g., using a name-based proxy server to forward connections to shell hosts:
   # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
   shell_in_a_box_url: false
+
+  # Format of preview links. If false, use keep_web_download_url
+  # instead, and disable inline preview. If both are false, use
+  # Workbench's built-in file download/preview mechanism.
+  #
+  # Examples:
+  # keep_web_url: https://%{uuid_or_pdh}.collections.uuid_prefix.arvadosapi.com
+  # keep_web_url: https://%{uuid_or_pdh}--collections.uuid_prefix.arvadosapi.com
+  #
+  # Example supporting only public data and collection-sharing links
+  # (other data will be handled as downloads via keep_web_download_url):
+  # keep_web_url: https://collections.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
+  keep_web_url: false
+
+  # Format of download links. If false, use keep_web_url with
+  # disposition=attachment query param.
+  #
+  # The host part of the keep_web_download_url value here must match
+  # the -attachment-only-host argument given to keep-web: if
+  # keep_web_download_url is "https://FOO.EXAMPLE/c=..." then keep-web
+  # must run with "-attachment-only-host=FOO.EXAMPLE".
+  #
+  # If keep_web_download_url is false, and keep_web_url uses a
+  # single-origin form, then Workbench will show an error page
+  # when asked to download or preview private data.
+  #
+  # Example:
+  # keep_web_download_url: https://download.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
+  keep_web_download_url: false
index 4ac68198e8bd6fa6404512ad5479397223de1bc8..d1c7934ab3ba16b57f61425edcc48a57b81df1e3 100644 (file)
@@ -12,6 +12,7 @@ module ArvadosWorkbench
 
     # Custom directories with classes and modules you want to be autoloadable.
     # config.autoload_paths += %W(#{config.root}/extras)
+    config.autoload_paths += %W(#{config.root}/lib)
 
     # Only load the plugins named here, in the order given (default is alphabetical).
     # :all can be used as a placeholder for all plugins not explicitly named.
diff --git a/apps/workbench/lib/app_version.rb b/apps/workbench/lib/app_version.rb
new file mode 100644 (file)
index 0000000..48cb8f6
--- /dev/null
@@ -0,0 +1,53 @@
+# If you change this file, you'll probably also want to make the same
+# changes in services/api/lib/app_version.rb.
+
+class AppVersion
+  def self.git(*args, &block)
+    IO.popen(["git", "--git-dir", ".git"] + args, "r",
+             chdir: Rails.root.join('../..'),
+             err: "/dev/null",
+             &block)
+  end
+
+  def self.forget
+    @hash = nil
+  end
+
+  # Return abbrev commit hash for current code version: "abc1234", or
+  # "abc1234-modified" if there are uncommitted changes. If present,
+  # return contents of {root}/git-commit.version instead.
+  def self.hash
+    if (cached = Rails.configuration.source_version || @hash)
+      return cached
+    end
+
+    # Read the version from our package's git-commit.version file, if available.
+    begin
+      @hash = IO.read(Rails.root.join("git-commit.version")).strip
+    rescue Errno::ENOENT
+    end
+
+    if @hash.nil? or @hash.empty?
+      begin
+        local_modified = false
+        git("status", "--porcelain") do |git_pipe|
+          git_pipe.each_line do |_|
+            STDERR.puts _
+            local_modified = true
+            # Continue reading the pipe so git doesn't get SIGPIPE.
+          end
+        end
+        if $?.success?
+          git("log", "-n1", "--format=%H") do |git_pipe|
+            git_pipe.each_line do |line|
+              @hash = line.chomp[0...8] + (local_modified ? '-modified' : '')
+            end
+          end
+        end
+      rescue SystemCallError
+      end
+    end
+
+    @hash || "unknown"
+  end
+end
index 15d52da3c34f5f12a45091968d5b875f3909872f..ef2a989427948a693c1257e7466f09e523c43c7b 100644 (file)
@@ -334,6 +334,26 @@ class ApplicationControllerTest < ActionController::TestCase
     assert_response 404
   end
 
+  [".navbar .login-menu a",
+   ".navbar .login-menu .dropdown-menu a"
+  ].each do |css_selector|
+    test "login link at #{css_selector.inspect} includes return_to param" do
+      # Without an anonymous token, we're immediately redirected to login.
+      Rails.configuration.anonymous_user_token =
+        api_fixture("api_client_authorizations", "anonymous", "api_token")
+      @controller = ProjectsController.new
+      test_uuid = "zzzzz-j7d0g-zzzzzzzzzzzzzzz"
+      get(:show, {id: test_uuid})
+      login_link = css_select(css_selector).first
+      assert_not_nil(login_link, "failed to select login link")
+      login_href = URI.unescape(login_link.attributes["href"])
+      # The parameter needs to include the full URL to work.
+      assert_includes(login_href, "://")
+      assert_match(/[\?&]return_to=[^&]*\/projects\/#{test_uuid}(&|$)/,
+                   login_href)
+    end
+  end
+
   test "Workbench returns 4xx when API server is unreachable" do
     # We're really testing ApplicationController's render_exception.
     # Our primary concern is that it doesn't raise an error and
index 13644e00bdce28db3460aa2f722f679deb107c7e..978a5133578c9bafa2adb98e6b7b86d455adfe09 100644 (file)
@@ -10,6 +10,15 @@ class CollectionsControllerTest < ActionController::TestCase
 
   NONEXISTENT_COLLECTION = "ffffffffffffffffffffffffffffffff+0"
 
+  def config_anonymous enable
+    Rails.configuration.anonymous_user_token =
+      if enable
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      else
+        false
+      end
+  end
+
   def stub_file_content
     # For the duration of the current test case, stub file download
     # content with a randomized (but recognizable) string. Return the
@@ -167,8 +176,7 @@ class CollectionsControllerTest < ActionController::TestCase
   end
 
   test 'anonymous download' do
-    Rails.configuration.anonymous_user_token =
-      api_fixture('api_client_authorizations')['anonymous']['api_token']
+    config_anonymous true
     expect_content = stub_file_content
     get :show_file, {
       uuid: api_fixture('collections')['user_agreement_in_anonymously_accessible_project']['uuid'],
@@ -205,15 +213,14 @@ class CollectionsControllerTest < ActionController::TestCase
                      "using a reader token set the session's API token")
   end
 
-  [false, api_fixture('api_client_authorizations')['anonymous']['api_token']].
-    each do |anon_conf|
-    test "download a file using a reader token with insufficient scope (anon_conf=#{!!anon_conf})" do
-      Rails.configuration.anonymous_user_token = anon_conf
+  [false, true].each do |anon|
+    test "download a file using a reader token with insufficient scope, anon #{anon}" do
+      config_anonymous anon
       params = collection_params(:foo_file, 'foo')
       params[:reader_token] =
         api_fixture('api_client_authorizations')['active_noscope']['api_token']
       get(:show_file, params)
-      if anon_conf
+      if anon
         # Some files can be shown without a valid token, but not this one.
         assert_response 404
       else
@@ -463,8 +470,7 @@ class CollectionsControllerTest < ActionController::TestCase
   end
 
   test "anonymous user accesses collection in shared project" do
-    Rails.configuration.anonymous_user_token =
-      api_fixture('api_client_authorizations')['anonymous']['api_token']
+    config_anonymous true
     collection = api_fixture('collections')['public_text_file']
     get(:show, {id: collection['uuid']})
 
@@ -514,4 +520,109 @@ class CollectionsControllerTest < ActionController::TestCase
     get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
     assert_not_includes @response.body, '<a href="#Upload"'
   end
+
+  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.example', dl_cfg=false
+    Rails.configuration.keep_web_url = cfg
+    Rails.configuration.keep_web_download_url = dl_cfg
+    @controller.expects(:file_enumerator).never
+  end
+
+  %w(uuid portable_data_hash).each do |id_type|
+    test "Redirect to keep_web_url via #{id_type}" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with reader token" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/t=#{tok}/_/w%20a%20z", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with no token" do
+      setup_for_keep_web
+      config_anonymous true
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {uuid: id, file: "Hello World.txt"}
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/_/Hello%20World.txt", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with disposition param" do
+      setup_for_keep_web
+      config_anonymous true
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {
+        uuid: id,
+        file: "Hello World.txt",
+        disposition: 'attachment',
+      }
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/_/Hello%20World.txt?disposition=attachment", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_download_url via #{id_type}" do
+      setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
+                         'https://download.example/c=%{uuid_or_pdh}')
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+    end
+  end
+
+  [false, true].each do |anon|
+    test "No redirect to keep_web_url if collection not found, anon #{anon}" do
+      setup_for_keep_web
+      config_anonymous anon
+      id = api_fixture('collections')['w_a_z_file']['uuid']
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:spectator)
+      assert_response 404
+    end
+
+    test "Redirect download to keep_web_download_url, anon #{anon}" do
+      config_anonymous anon
+      setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
+                         'https://download.example/c=%{uuid_or_pdh}')
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['public_text_file']['uuid']
+      get :show_file, {
+        uuid: id,
+        file: 'Hello world.txt',
+        disposition: 'attachment',
+      }, session_for(:active)
+      assert_response :redirect
+      expect_url = "https://download.example/c=#{id.sub '+', '-'}/_/Hello%20world.txt"
+      if not anon
+        expect_url += "?api_token=#{tok}"
+      end
+      assert_equal expect_url, @response.redirect_url
+    end
+  end
+
+  test "Error if file is impossible to retrieve from keep_web_url" do
+    # Cannot pass a session token using a single-origin keep-web URL,
+    # cannot read this collection without a session token.
+    setup_for_keep_web 'https://collections.example/c=%{uuid_or_pdh}', false
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+    assert_response 422
+  end
+
+  test "Redirect preview to keep_web_download_url when preview is disabled" do
+    setup_for_keep_web false, 'https://download.example/c=%{uuid_or_pdh}'
+    tok = api_fixture('api_client_authorizations')['active']['api_token']
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+    assert_response :redirect
+    assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+  end
 end
index 3416cc0e61026c9ef5d4e7caee3baf4289095ca2..8fa9fe9a817e7f2e3b6494099b85afc53c05ac57 100644 (file)
@@ -239,7 +239,7 @@ class ProjectsControllerTest < ActionController::TestCase
     Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
     get(:show, {id: api_fixture('groups')['aproject']['uuid']})
     assert_response 404
-    assert_includes @response.inspect, 'you are not logged in'
+    assert_match(/log ?in/i, @response.body)
   end
 
   test "visit home page as anonymous when anonymous browsing is enabled and expect login" do
diff --git a/apps/workbench/test/helpers/download_helper.rb b/apps/workbench/test/helpers/download_helper.rb
new file mode 100644 (file)
index 0000000..776f0c7
--- /dev/null
@@ -0,0 +1,23 @@
+module DownloadHelper
+  module_function
+
+  def path
+    Rails.root.join 'tmp', 'downloads'
+  end
+
+  def clear
+    if File.exist? path
+      FileUtils.rm_r path
+    end
+    begin
+      Dir.mkdir path
+    rescue Errno::EEXIST
+    end
+  end
+
+  def done
+    Dir[path.join '*'].reject do |f|
+      /\.part$/ =~ f
+    end
+  end
+end
index 9d8f8d03252d036cf9dca696b833a1097f17e46b..0d20aacbf7cbe8cdfe580c200a033621f88658a7 100644 (file)
@@ -31,7 +31,7 @@ module ShareObjectHelper
       end
       click_on "Add"
     end
-    using_wait_time(Capybara.default_wait_time * 3) do
+    using_wait_time(Capybara.default_max_wait_time * 3) do
       assert(page.has_link?(name),
              "new share was not added to sharing table")
       assert_equal(start_share_count + 1, share_rows.size,
@@ -57,7 +57,7 @@ module ShareObjectHelper
       end
     end
     # Ensure revoked permission disappears from page.
-    using_wait_time(Capybara.default_wait_time * 3) do
+    using_wait_time(Capybara.default_max_wait_time * 3) do
       assert_no_text name
       assert_equal(start_rows.size - 1, share_rows.size,
                    "revoking share did not remove row from sharing table")
index aabbf00dc206e77090d1d9096c3abed23eec8530..d58a0315ee54595c2e3d4d10e09c4a822d2e93ae 100644 (file)
@@ -211,6 +211,7 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
     ['pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', true, 'admin'],
 
     ['completed_job_in_publicly_accessible_project', true],
+    ['running_job_in_publicly_accessible_project', true],
     ['job_in_publicly_accessible_project_but_other_objects_elsewhere', false],
   ].each do |fixture, objects_readable, user=nil|
     test "access #{fixture} in public project with objects readable=#{objects_readable} with user #{user}" do
@@ -218,18 +219,18 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
 
       if pipeline_page
         object = api_fixture('pipeline_instances')[fixture]
-        page = "/pipeline_instances/#{object['uuid']}"
+        page_link = "/pipeline_instances/#{object['uuid']}"
         expect_log_text = "Log for foo"
       else      # job
         object = api_fixture('jobs')[fixture]
-        page = "/jobs/#{object['uuid']}"
+        page_link = "/jobs/#{object['uuid']}"
         expect_log_text = "stderr crunchstat"
       end
 
       if user
-        visit page_with_token user, page
+        visit page_with_token user, page_link
       else
-        visit page
+        visit page_link
       end
 
       # click job link, if in pipeline page
@@ -241,11 +242,14 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
         assert_no_text 'Output data not available'
         if pipeline_page
           assert_text 'This pipeline was created from'
-          assert_selector 'a', text: object['components']['foo']['job']['uuid']
+          job_id = object['components']['foo']['job']['uuid']
+          assert_selector 'a', text: job_id
+          assert_selector "a[href=\"/jobs/#{job_id}#Log\"]", text: 'Log'
+
           # We'd like to test the Log tab on job pages too, but we can't right
           # now because Poltergeist 1.x doesn't support JavaScript's
           # Function.prototype.bind, which is used by job_log_graph.js.
-          click_link "Log"
+          find(:xpath, "//a[@href='#Log']").click
           assert_text expect_log_text
         end
       else
@@ -255,8 +259,9 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
         if pipeline_page
           assert_no_text 'This pipeline was created from'  # template is not readable
           assert_no_selector 'a', text: object['components']['foo']['job']['uuid']
+          assert_text 'Log unavailable'
         end
-        click_link "Log"
+        find(:xpath, "//a[@href='#Log']").click
         assert_text 'Output data not available'
         assert_no_text expect_log_text
       end
index 61ba16294f85f5d7bacaf02486b3f83a2e253fac..db072e496be7affdddec7d2f8cf2a164ed67bc5b 100644 (file)
@@ -218,14 +218,14 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
   end
 
    [
-    ['Repositories',nil,'s0uqq'],
-    ['Virtual machines','virtual machine','current_user_logins'],
-    ['SSH keys',nil,'public_key'],
-    ['Links','link','link_class'],
-    ['Groups','group','group_class'],
-    ['Compute nodes','node','info[ping_secret'],
-    ['Keep services','keep service','service_ssl_flag'],
-    ['Keep disks', 'keep disk','bytes_free'],
+    ['Repositories', nil, 's0uqq'],
+    ['Virtual machines', nil, 'testvm.shell'],
+    ['SSH keys', nil, 'public_key'],
+    ['Links', nil, 'link_class'],
+    ['Groups', nil, 'All users'],
+    ['Compute nodes', nil, 'ping_secret'],
+    ['Keep services', nil, 'service_ssl_flag'],
+    ['Keep disks', nil, 'bytes_free'],
   ].each do |page_name, add_button_text, look_for|
     test "test system menu #{page_name} link" do
       visit page_with_token('admin')
index 62efee4d67e6b4e5a84e2340bcc55902b18ba30d..903df90fb419c7ba231ae3c42d679abc85af41ed 100644 (file)
@@ -7,9 +7,19 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
         io.write content
       end
     end
+    # Database reset doesn't restore KeepServices; we have to
+    # save/restore manually.
+    use_token :admin do
+      @keep_services = KeepService.all.to_a
+    end
   end
 
   teardown do
+    use_token :admin do
+      @keep_services.each do |ks|
+        KeepService.find(ks.uuid).update_attributes(ks.attributes)
+      end
+    end
     testfiles.each do |filename, _|
       File.unlink(testfile_path filename)
     end
@@ -42,7 +52,7 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
     assert_match /_text":"\. d41d8\S+ 0:0:empty.txt\\n\. d41d8\S+ 0:0:empty\\\\040\(1\).txt\\n"/, body
   end
 
-  test "Upload non-empty files, report errors" do
+  test "Upload non-empty files" do
     need_selenium "to make file uploads work"
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
@@ -50,24 +60,17 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
     attach_file 'file_selector', testfile_path('foo.txt')
     assert_selector 'button:not([disabled])', text: 'Start'
     click_button 'Start'
-    if "test environment does not have a keepproxy yet, see #4534" != "fixed"
-      using_wait_time 20 do
-        assert_text :visible, 'error'
-      end
-    else
-      assert_text :visible, 'Done!'
-      visit sandbox_path+'.json'
-      assert_match /_text":"\. 0cc1\S+ 0:1:a\\n\. acbd\S+ 0:3:foo.txt\\n"/, body
-    end
+    assert_text :visible, 'Done!'
+    visit sandbox_path+'.json'
+    assert_match /_text":"\. 0cc1\S+ 0:1:a\\n\. acbd\S+ 0:3:foo.txt\\n"/, body
   end
 
   test "Report mixed-content error" do
     skip 'Test suite does not use TLS'
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      proxy.update_attributes service_ssl_flag: false
+    use_token :admin do
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_ssl_flag: false)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
@@ -82,11 +85,12 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
 
   test "Report network error" do
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      # Even if you somehow do port>2^16, surely nx.example.net won't respond
-      proxy.update_attributes service_host: 'nx.example.net', service_port: 99999
+    use_token :admin do
+      # Even if you somehow do port>2^16, surely nx.example.net won't
+      # respond
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_host: 'nx.example.net',
+                          service_port: 99999)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
index 4f66e9d6b58bf4e9402a95ab21192268fa9edd1a..8190b35f00747627c198af7bdd296d2e61d23377 100644 (file)
@@ -31,7 +31,7 @@ class CollectionsTest < ActionDispatch::IntegrationTest
       text_assertion = :assert_text
       link_assertion = :refute_empty
     end
-    using_wait_time(Capybara.default_wait_time * 3) do
+    using_wait_time(Capybara.default_max_wait_time * 3) do
       send(text_assertion, "Shared at:")
     end
     send(link_assertion, all("a").select { |a| a[:href] =~ link_regexp })
diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb
new file mode 100644 (file)
index 0000000..ed91ae0
--- /dev/null
@@ -0,0 +1,94 @@
+require 'integration_helper'
+require 'helpers/download_helper'
+
+class DownloadTest < ActionDispatch::IntegrationTest
+  def getport service
+    File.read(File.expand_path("../../../../../tmp/#{service}.port", __FILE__))
+  end
+
+  setup do
+    @kwport = getport 'keep-web-ssl'
+    @kwdport = getport 'keep-web-dl-ssl'
+    Rails.configuration.keep_web_url = "https://localhost:#{@kwport}/c=%{uuid_or_pdh}"
+    Rails.configuration.keep_web_download_url = "https://localhost:#{@kwdport}/c=%{uuid_or_pdh}"
+    CollectionsController.any_instance.expects(:file_enumerator).never
+
+    # Make sure Capybara can download files.
+    need_selenium 'for downloading', :selenium_with_download
+    DownloadHelper.clear
+
+    # Keep data isn't populated by fixtures, so we have to write any
+    # data we expect to read.
+    ['foo', 'w a z', "Hello world\n"].each do |data|
+      md5 = `echo -n #{data.shellescape} | arv-put --no-progress --raw -`
+      assert_match /^#{Digest::MD5.hexdigest(data)}/, md5
+      assert $?.success?, $?
+    end
+  end
+
+  ['uuid', 'portable_data_hash'].each do |id_type|
+    test "preview from keep-web by #{id_type} using a reader token" do
+      uuid_or_pdh = api_fixture('collections')['foo_file'][id_type]
+      token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
+      visit "/collections/download/#{uuid_or_pdh}/#{token}/"
+      within "#collection_files" do
+        click_link 'foo'
+      end
+      assert_no_selector 'a'
+      assert_text 'foo'
+    end
+
+    test "preview anonymous content from keep-web by #{id_type}" do
+      Rails.configuration.anonymous_user_token =
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      uuid_or_pdh =
+        api_fixture('collections')['public_text_file'][id_type]
+      visit "/collections/#{uuid_or_pdh}"
+      within "#collection_files" do
+        find('[title~=View]').click
+      end
+      assert_no_selector 'a'
+      assert_text 'Hello world'
+    end
+
+    test "download anonymous content from keep-web by #{id_type}" do
+      Rails.configuration.anonymous_user_token =
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      uuid_or_pdh =
+        api_fixture('collections')['public_text_file'][id_type]
+      visit "/collections/#{uuid_or_pdh}"
+      within "#collection_files" do
+        find('[title~=Download]').click
+      end
+      wait_for_download 'Hello world.txt', "Hello world\n"
+    end
+  end
+
+  test "download from keep-web using a session token" do
+    uuid = api_fixture('collections')['w_a_z_file']['uuid']
+    token = api_fixture('api_client_authorizations')['active']['api_token']
+    visit page_with_token('active', "/collections/#{uuid}")
+    within "#collection_files" do
+      find('[title~=Download]').click
+    end
+    wait_for_download 'w a z', 'w a z'
+  end
+
+  def wait_for_download filename, expect_data
+    data = nil
+    tries = 0
+    while tries < 20
+      sleep 0.1
+      tries += 1
+      data = File.read(DownloadHelper.path.join filename) rescue nil
+    end
+    assert_equal expect_data, data
+  end
+
+  # TODO(TC): test "view pages hosted by keep-web, using session
+  # token". We might persuade selenium to send
+  # "collection-uuid.dl.example" requests to localhost by configuring
+  # our test nginx server to work as its forward proxy. Until then,
+  # we're relying on the "Redirect to keep_web_url via #{id_type}"
+  # test in CollectionsControllerTest (and keep-web's tests).
+end
index f2067a92bfdb27233b59fbc4ddb730156c5ce8c8..32f6e027557b838980df3769b88a8aa49f2e3a04 100644 (file)
@@ -46,18 +46,6 @@ class ErrorsTest < ActionDispatch::IntegrationTest
     page.html =~ /\b(#{matching_stamps})\+[0-9A-Fa-f]{8}\b/
   end
 
-  # We use API tokens with limited scopes as the quickest way to get the API
-  # server to return an error.  If Workbench gets smarter about coping when
-  # it has a too-limited token, these tests will need to be adjusted.
-  test "API error page includes error token" do
-    start_stamp = now_timestamp
-    visit(page_with_token("active_readonly", "/groups"))
-    click_on "Add a new group"
-    assert(page.has_text?(/fiddlesticks/i),
-           "Not on an error page after making a group out of scope")
-    assert(page_has_error_token?(start_stamp), "no error token on 404 page")
-  end
-
   test "showing a bad UUID returns 404" do
     visit(page_with_token("active", "/pipeline_templates/zzz"))
     assert(page.has_no_text?(/fiddlesticks/i),
index 2cae500027aeebd8f79270a4ba26f61e4496373f..b8d38903797d56eae4b01169efce1c4b169f232b 100644 (file)
@@ -41,7 +41,7 @@ class JobsTest < ActionDispatch::IntegrationTest
     visit page_with_token("active", "/jobs/#{job['uuid']}")
     assert page.has_text? job['script_version']
 
-    click_link 'Log'
+    find(:xpath, "//a[@href='#Log']").click
     wait_for_ajax
     assert page.has_text? 'Started at'
     assert page.has_text? 'Finished at'
@@ -61,7 +61,7 @@ class JobsTest < ActionDispatch::IntegrationTest
     visit page_with_token("active", "/jobs/#{job['uuid']}")
     assert page.has_text? job['script_version']
 
-    click_link 'Log'
+    find(:xpath, "//a[@href='#Log']").click
     wait_for_ajax
     assert page.has_text? 'Showing only 100 bytes of this log'
   end
index b6bf700d0948a1eb85a8ed00fe0c30fc6ae6e037..2ab8beb294ab8f2ae99e1c6866d2ad7efbcb0822 100644 (file)
@@ -78,7 +78,7 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
       first('span', text: 'foo_tag').click
       find('.btn', text: 'Copy').click
     end
-    using_wait_time(Capybara.default_wait_time * 3) do
+    using_wait_time(Capybara.default_max_wait_time * 3) do
       wait_for_ajax
     end
 
@@ -166,7 +166,7 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
       first('span', text: 'foo_tag').click
       find('.btn', text: 'Copy').click
     end
-    using_wait_time(Capybara.default_wait_time * 3) do
+    using_wait_time(Capybara.default_max_wait_time * 3) do
       wait_for_ajax
     end
 
@@ -545,12 +545,12 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
   test "job logs linked for running pipeline" do
     pi = api_fixture("pipeline_instances", "running_pipeline_with_complete_job")
     visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
-    click_on "Log"
+    find(:xpath, "//a[@href='#Log']").click
     within "#Log" do
       assert_text "Log for previous"
       log_link = find("a", text: "Log for previous")
       assert_includes(log_link[:href],
-                      pi["components"]["previous"]["job"]["log"])
+                      "/jobs/#{pi["components"]["previous"]["job"]["uuid"]}#Log")
       assert_selector "#event_log_div"
     end
   end
@@ -558,12 +558,12 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
   test "job logs linked for complete pipeline" do
     pi = api_fixture("pipeline_instances", "complete_pipeline_with_two_jobs")
     visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
-    click_on "Log"
+    find(:xpath, "//a[@href='#Log']").click
     within "#Log" do
       assert_text "Log for previous"
       pi["components"].each do |cname, cspec|
         log_link = find("a", text: "Log for #{cname}")
-        assert_includes(log_link[:href], cspec["job"]["log"])
+        assert_includes(log_link[:href], "/jobs/#{cspec["job"]["uuid"]}#Log")
       end
       assert_no_selector "#event_log_div"
     end
@@ -572,12 +572,12 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
   test "job logs linked for failed pipeline" do
     pi = api_fixture("pipeline_instances", "failed_pipeline_with_two_jobs")
     visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
-    click_on "Log"
+    find(:xpath, "//a[@href='#Log']").click
     within "#Log" do
       assert_text "Log for previous"
       pi["components"].each do |cname, cspec|
         log_link = find("a", text: "Log for #{cname}")
-        assert_includes(log_link[:href], cspec["job"]["log"])
+        assert_includes(log_link[:href], "/jobs/#{cspec["job"]["uuid"]}#Log")
       end
       assert_no_selector "#event_log_div"
     end
index 1ae302c23947c2968d194fdb006bcdaf3561be04..90a3eb2333fedeae32b80ef9146323689bf2fbca 100644 (file)
@@ -197,4 +197,26 @@ class UsersTest < ActionDispatch::IntegrationTest
     click_link 'Metadata'
     assert page.has_text? 'VirtualMachine: testvm.shell'
   end
+
+  test "test add group button" do
+    need_javascript
+
+    user_url = "/users/#{api_fixture('users')['active']['uuid']}"
+    visit page_with_token('admin_trustedclient', user_url)
+
+    # Setup user
+    click_link 'Admin'
+    assert page.has_text? 'As an admin, you can setup'
+
+    click_link 'Add new group'
+
+    within '.modal-content' do
+      fill_in "group_name_input", :with => "test-group-added-in-modal"
+      click_button "Create"
+    end
+    wait_for_ajax
+
+    # Back in the user "Admin" tab
+    assert page.has_text? 'test-group-added-in-modal'
+  end
 end
index 1d398a595ce854335df8e3c1d5a434632ed89728..a22337bcb4469e96a08787338998d286ec484ca3 100644 (file)
@@ -1,18 +1,4 @@
 require 'integration_helper'
 
 class VirtualMachinesTest < ActionDispatch::IntegrationTest
-  test "make and name a new virtual machine" do
-    need_javascript
-    visit page_with_token('admin_trustedclient')
-    find('#system-menu').click
-    click_link 'Virtual machines'
-    assert page.has_text? 'testvm.shell'
-    click_on 'Add a new virtual machine'
-    find('tr', text: 'hostname').
-      find('a[data-original-title=edit]').click
-    assert page.has_text? 'Edit hostname'
-    fill_in 'editable-text', with: 'testname'
-    click_button 'editable-submit'
-    assert page.has_text? 'testname'
-  end
 end
index 39fdf4b260abd68be05f252158f29629aa199199..48d2cb5f6d3123513e345a24c5489d2893fb3f2e 100644 (file)
@@ -4,25 +4,61 @@ require 'capybara/poltergeist'
 require 'uri'
 require 'yaml'
 
-POLTERGEIST_OPTS = {
-  window_size: [1200, 800],
-  phantomjs_options: ['--ignore-ssl-errors=true'],
-  inspector: true,
-}
+def available_port for_what
+  Addrinfo.tcp("0.0.0.0", 0).listen do |srv|
+    port = srv.connect_address.ip_port
+    STDERR.puts "Using port #{port} for #{for_what}"
+    return port
+  end
+end
+
+def selenium_opts
+  {
+    port: available_port('selenium'),
+  }
+end
+
+def poltergeist_opts
+  {
+    phantomjs_options: ['--ignore-ssl-errors=true'],
+    port: available_port('poltergeist'),
+    window_size: [1200, 800],
+  }
+end
 
 Capybara.register_driver :poltergeist do |app|
-  Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS
+  Capybara::Poltergeist::Driver.new app, poltergeist_opts
+end
+
+Capybara.register_driver :poltergeist_debug do |app|
+  Capybara::Poltergeist::Driver.new app, poltergeist_opts.merge(inspector: true)
 end
 
 Capybara.register_driver :poltergeist_without_file_api do |app|
   js = File.expand_path '../support/remove_file_api.js', __FILE__
-  Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(extensions: [js])
+  Capybara::Poltergeist::Driver.new app, poltergeist_opts.merge(extensions: [js])
+end
+
+Capybara.register_driver :selenium do |app|
+  Capybara::Selenium::Driver.new app, selenium_opts
+end
+
+Capybara.register_driver :selenium_with_download do |app|
+  profile = Selenium::WebDriver::Firefox::Profile.new
+  profile['browser.download.dir'] = DownloadHelper.path.to_s
+  profile['browser.download.downloadDir'] = DownloadHelper.path.to_s
+  profile['browser.download.defaultFolder'] = DownloadHelper.path.to_s
+  profile['browser.download.folderList'] = 2 # "save to user-defined location"
+  profile['browser.download.manager.showWhenStarting'] = false
+  profile['browser.helperApps.alwaysAsk.force'] = false
+  profile['browser.helperApps.neverAsk.saveToDisk'] = 'text/plain,application/octet-stream'
+  Capybara::Selenium::Driver.new app, selenium_opts.merge(profile: profile)
 end
 
 module WaitForAjax
-  Capybara.default_wait_time = 5
+  Capybara.default_max_wait_time = 10
   def wait_for_ajax
-    Timeout.timeout(Capybara.default_wait_time) do
+    Timeout.timeout(Capybara.default_max_wait_time) do
       loop until finished_all_ajax_requests?
     end
   end
@@ -50,8 +86,10 @@ end
 
 module HeadlessHelper
   class HeadlessSingleton
+    @display = ENV['ARVADOS_TEST_HEADLESS_DISPLAY'] || rand(400)+100
+    STDERR.puts "Using display :#{@display} for headless tests"
     def self.get
-      @headless ||= Headless.new reuse: false
+      @headless ||= Headless.new reuse: false, display: @display
     end
   end
 
@@ -73,8 +111,8 @@ module HeadlessHelper
     end
   end
 
-  def need_selenium reason=nil
-    Capybara.current_driver = :selenium
+  def need_selenium reason=nil, driver=:selenium
+    Capybara.current_driver = driver
     unless ENV['ARVADOS_TEST_HEADFUL'] or @headless
       @headless = HeadlessSingleton.get
       @headless.start
index 89d15c67d8de3708c4d74540518b9707e09aa432..41592af993261b6affd55fa6cba0f05780d475ec 100644 (file)
@@ -176,7 +176,10 @@ class ApiServerForTests
       # though it doesn't need to start up a new server).
       env_script = check_output %w(python ./run_test_server.py start --auth admin)
       check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_keep-web)
       check_output %w(python ./run_test_server.py start_nginx)
+      # This one isn't a no-op, even under run-tests.sh.
+      check_output %w(python ./run_test_server.py start_keep)
     end
     test_env = {}
     env_script.each_line do |line|
@@ -192,9 +195,11 @@ class ApiServerForTests
 
   def stop_test_server
     Dir.chdir PYTHON_TESTS_DIR do
+      check_output %w(python ./run_test_server.py stop_keep)
       # These are no-ops if we're running within run-tests.sh
       check_output %w(python ./run_test_server.py stop_nginx)
       check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop_keep-web)
       check_output %w(python ./run_test_server.py stop)
     end
     @@server_is_running = false
index 925a3757a25b449bab2dd67a2d4f2b521b948d62..42f31e365ca58f3add83cbba0bd73855d1190e30 100644 (file)
@@ -6,3 +6,6 @@ case "$TARGET" in
         fpm_depends+=(libc6)
         ;;
 esac
+
+# FIXME: Remove this line after #6885 is done.
+fpm_args+=(--iteration 2)
index c5783655b63558fb3988699981cf8634a0b61f4c..a7d9398701b7bc4c405027c26f5133fe7b23d383 100644 (file)
@@ -8,3 +8,6 @@ case "$TARGET" in
         fpm_depends+=(libc6 libfuse2)
         ;;
 esac
+
+# FIXME: Remove this line after #6885 is done.
+fpm_args+=(--iteration 2)
index cea7096395b8a5556d377f8f2e82732c8cf49f3f..52df46573da5fae9d1a87d14589f598bde198f0e 100644 (file)
@@ -9,3 +9,6 @@ case "$TARGET" in
         fpm_depends+=(libc6)
         ;;
 esac
+
+# FIXME: Remove this line after #6885 is done.
+fpm_args+=(--iteration 2)
index 4d2860f7b7d9e01994de7b72810a0eacda40abcb..e4fbf59344ec041875f016b8e7bf227420ce4fef 100644 (file)
@@ -1,3 +1,6 @@
+# FIXME: Remove this line after #6885 is done.
+fpm_args+=(--iteration 2)
+
 case "$TARGET" in
     centos6)
             fpm_depends+=(
diff --git a/crunch_scripts/test/task_output_dir b/crunch_scripts/test/task_output_dir
new file mode 100755 (executable)
index 0000000..b177892
--- /dev/null
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import arvados
+import arvados.crunch
+import hashlib
+import os
+
+out = arvados.crunch.TaskOutputDir()
+
+string = open(__file__).read()
+with open(os.path.join(out.path, 'example.out'), 'w') as f:
+    f.write(string)
+with open(os.path.join(out.path, 'example.out.SHA1'), 'w') as f:
+    f.write(hashlib.sha1(string).hexdigest() + "\n")
+
+arvados.current_task().set_output(out.manifest_text())
index 1bdd2ab4461c244b4ac2a2ee5eb725694c67ad62..2f37f5af0803c360f2c0cce2d08389d64b21249f 100644 (file)
@@ -153,7 +153,9 @@ navbar:
       - install/install-shell-server.html.textile.liquid
       - install/create-standard-objects.html.textile.liquid
       - install/install-keepstore.html.textile.liquid
+      - install/configure-azure-blob-storage.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
+      #- install/install-keep-web.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
     - Helpful hints:
index 502e2bcb3a41e1bdacd8f0c55a1afde95b7a55d6..e79cffb7f86dd9adf5a622b231ceda25e057f4e4 100644 (file)
@@ -5,7 +5,6 @@ h4(#rvm). *Option 1: Install with RVM*
 <notextile>
 <pre><code><span class="userinput">sudo gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
 \curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.1
-sudo adduser "$USER" rvm
 </span></code></pre></notextile>
 
 Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
@@ -46,7 +45,7 @@ Install prerequisites for Ubuntu 12.04 or 14.04:
 <pre><code><span class="userinput">sudo apt-get install \
     gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
     libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
-    libncurses5-dev automake libtool bison pkg-config libffi-dev
+    libncurses5-dev automake libtool bison pkg-config libffi-dev curl
 </span></code></pre></notextile>
 
 Build and install Ruby:
@@ -54,8 +53,8 @@ Build and install Ruby:
 <notextile>
 <pre><code><span class="userinput">mkdir -p ~/src
 cd ~/src
-curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz | tar xz
-cd ruby-2.1.6
+curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.7.tar.gz | tar xz
+cd ruby-2.1.7
 ./configure --disable-install-rdoc
 make
 sudo make install
index abd00715038a55cb799e0b8e7f6e68387277349a..3f695934aecff49dc71094eb93b6dffa07dc01bb 100644 (file)
@@ -1,29 +1,36 @@
 {
     "name":"run-command example pipeline",
     "components":{
-        "bwa-mem": {
+         "bwa-mem": {
             "script": "run-command",
             "script_version": "master",
             "repository": "arvados",
             "script_parameters": {
                 "command": [
-                    "bwa",
+                    "$(dir $(bwa_collection))/bwa",
                     "mem",
                     "-t",
                     "$(node.cores)",
+                    "-R",
+                    "@RG\\\tID:group_id\\\tPL:illumina\\\tSM:sample_id",
                     "$(glob $(dir $(reference_collection))/*.fasta)",
                     "$(glob $(dir $(sample))/*_1.fastq)",
                     "$(glob $(dir $(sample))/*_2.fastq)"
                 ],
-                "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam",
                 "reference_collection": {
                     "required": true,
                     "dataclass": "Collection"
                 },
+                "bwa_collection": {
+                    "required": true,
+                    "dataclass": "Collection",
+                    "default": "39c6f22d40001074f4200a72559ae7eb+5745"
+                },
                 "sample": {
                     "required": true,
                     "dataclass": "Collection"
-                }
+                },
+                "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam"
             }
         }
     }
index 098767300fede09944adce305ef017739ebf3b76..db2aa8dac154d0290ce89a725b51b5434996b52b 100644 (file)
@@ -12,6 +12,6 @@ Paste your public key into the text area labeled *Public Key*, and click on the
 
 h1(#login). Using SSH to log into an Arvados VM
 
-To see a list of virtual machines that you have access to and determine the name and login information, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu and click on the menu item *Virtual machines* to go to the Virtual machines page. This page lists the virtual machines you can access. The *hostname* column lists the name of each available VM.  The *logins* column will have a list of comma separated values of the form @you@. In this guide the hostname will be *_shell_* and the login will be *_you_*.  Replace these with your hostname and login name as appropriate.
+To see a list of virtual machines that you have access to and determine the name and login information, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu and click on the menu item *Virtual machines* to go to the Virtual machines page. This page lists the virtual machines you can access. The *Host name* column lists the name of each available VM.  The *Login name* column will have a list of comma separated values of the form @you@. In this guide the hostname will be *_shell_* and the login will be *_you_*.  Replace these with your hostname and login name as appropriate.
 
 
index 413e51f9c057565647f829506506b4fd58aa7c8d..8a9e52abe7889e986dc1b9002f3bffc313219294 100644 (file)
@@ -12,7 +12,7 @@
                     "-t",
                     "$(node.cores)",
                     "-R",
-                    "@RG\\tID:group_id\\tPL:illumina\\tSM:sample_id",
+                    "@RG\\\tID:group_id\\\tPL:illumina\\\tSM:sample_id",
                     "$(glob $(dir $(reference_collection))/*.fasta)",
                     "$(glob $(dir $(sample))/*_1.fastq)",
                     "$(glob $(dir $(sample))/*_2.fastq)"
index fd635034b1e8db79b97a1893de07d07f87ed95ee..5bc7611d0deedede7db25a38af830ffb877dc693 100644 (file)
@@ -57,11 +57,12 @@ h3. Runtime constraints
 table(table table-bordered table-condensed).
 |_. Key|_. Type|_. Description|_. Implemented|
 |arvados_sdk_version|string|The Git version of the SDKs to use from the Arvados git repository.  See "Specifying Git versions":#script_version for more detail about acceptable ways to specify a commit.  If you use this, you must also specify a @docker_image@ constraint (see below).  In order to install the Python SDK successfully, Crunch must be able to find and run virtualenv inside the container.|&#10003;|
-|docker_image|string|The Docker image that this Job needs to run.  If specified, Crunch will create a Docker container from this image, and run the Job's script inside that.  The Keep mount and work directories will be available as volumes inside this container.  The image must be uploaded to Arvados using @arv keep docker@.  You may specify the image in any format that Docker accepts, such as @arvados/jobs@, @debian:latest@, or the Docker image id.  Alternatively, you may specify the UUID or portable data hash of the image Collection, returned by @arv keep docker@.|&#10003;|
+|docker_image|string|The Docker image that this Job needs to run.  If specified, Crunch will create a Docker container from this image, and run the Job's script inside that.  The Keep mount and work directories will be available as volumes inside this container.  The image must be uploaded to Arvados using @arv keep docker@.  You may specify the image in any format that Docker accepts, such as @arvados/jobs@, @debian:latest@, or the Docker image id.  Alternatively, you may specify the portable data hash of the image Collection.|&#10003;|
 |min_nodes|integer||&#10003;|
 |max_nodes|integer|||
 |min_cores_per_node|integer|Require that each node assigned to this Job have the specified number of CPU cores|&#10003;|
 |min_ram_mb_per_node|integer|Require that each node assigned to this Job have the specified amount of real memory (in MiB)|&#10003;|
 |min_scratch_mb_per_node|integer|Require that each node assigned to this Job have the specified amount of scratch storage available (in MiB)|&#10003;|
 |max_tasks_per_node|integer|Maximum simultaneous tasks on a single node|&#10003;|
+|keep_cache_mb_per_task|integer|Size of file data buffer for per-task Keep directory ($TASK_KEEPMOUNT), in MiB.  Default is 256 MiB.  Increase this to reduce cache thrashing in situtations such as accessing multiple large (64+ MiB) files at the same time, or accessing different parts of a large file at the same time.|&#10003;|
 |min_ram_per_task|integer|Minimum real memory (KiB) per task||
index 8cbf959fe638b8b90452b082a44f5164a81d4ed4..31a86c2ef44b770ebdf1ec816bf434d9dac6e6fa 100644 (file)
@@ -41,7 +41,7 @@ title: Arvados | Documentation
     <div class="col-sm-6" style="border-left: solid; border-width: 1px">
       <p><strong>Quickstart</strong> 
       <p>
-        Try any pipeline from the <a href="https://dev.arvados.org/projects/arvados/wiki/Public_Pipelines_and_Datasets">list of public pipelines</a>. For instance, the <a href="http://curover.se/pathomap">Pathomap Pipeline</a> links to these <a href="https://dev.arvados.org/projects/arvados/wiki/pathomap_tutorial/">step-by-step instructions</a> for trying Arvados out right in your browser using Curoverse's <a href="http://lp.curoverse.com/beta-signup/">public Arvados instance</a>.
+        Try any pipeline from the <a href="https://cloud.curoverse.com/projects/public">list of public pipelines</a>. For instance, the <a href="http://curover.se/pathomap">Pathomap Pipeline</a> links to these <a href="https://dev.arvados.org/projects/arvados/wiki/pathomap_tutorial/">step-by-step instructions</a> for trying Arvados out right in your browser using Curoverse's <a href="http://lp.curoverse.com/beta-signup/">public Arvados instance</a>.
       </p>
         <!--<p>-->
       <!--<ol>-->
diff --git a/doc/install/configure-azure-blob-storage.html.textile.liquid b/doc/install/configure-azure-blob-storage.html.textile.liquid
new file mode 100644 (file)
index 0000000..e365d74
--- /dev/null
@@ -0,0 +1,62 @@
+---
+layout: default
+navsection: installguide
+title: Configure Azure Blob storage
+...
+
+As an alternative to local and network-attached POSIX filesystems, Keepstore can store data in an Azure Storage container.
+
+h2. Create a container
+
+Normally, all keepstore services are configured to share a single Azure Storage container.
+
+Using the Azure web portal or command line tool, create or choose a storage account with a suitable redundancy profile and availability region. Use the storage account keys to create a new container.
+
+<notextile>
+<pre><code>~$ <span class="userinput">azure config mode arm</span>
+~$ <span class="userinput">azure login</span>
+~$ <span class="userinput">azure group create exampleGroupName eastus</span>
+~$ <span class="userinput">azure storage account create --type LRS --location eastus --resource-group exampleGroupName exampleStorageAccountName</span>
+~$ <span class="userinput">azure storage account keys list --resource-group exampleGroupName exampleStorageAccountName</span>
+info:    Executing command storage account keys list
++ Getting storage account keys
+data:    Primary: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz==
+data:    Secondary: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy==
+info:    storage account keys list command OK
+~$ <span class="userinput">AZURE_STORAGE_ACCOUNT="exampleStorageAccountName" \
+AZURE_STORAGE_ACCESS_KEY="zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz==" \
+azure storage container create exampleContainerName</span>
+</code></pre>
+</notextile>
+
+h2. Configure keepstore
+
+Copy the primary storage account key to a file where it will be accessible to keepstore at startup time.
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo sh -c 'cat &gt;/etc/sv/keepstore/exampleStorageAccountName.key &lt;&lt;EOF'
+zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz==
+EOF</span>
+~$ <span class="userinput">sudo chmod 0400 /etc/sv/keepstore/exampleStorageAccountName.key</span>
+</code></pre>
+</notextile>
+
+In your keepstore startup script, instead of specifying a local storage using @-volume /path@ or discovering mount points automatically, use @-azure-*@ arguments to specify the storage container:
+
+<notextile>
+<pre><code>#!/bin/sh
+
+exec 2&gt;&amp;1
+exec keepstore \
+ -azure-storage-account-key-file <span class="userinput">/etc/sv/keepstore/exampleStorageAccountName.key</span> \
+ -azure-storage-account-name <span class="userinput">exampleStorageAccountName</span> \
+ -azure-storage-container-volume <span class="userinput">exampleContainerName</span>
+</code></pre>
+</notextile>
+
+Start (or restart) keepstore, and check its log file to confirm it is using the new configuration.
+
+<notextile>
+<pre><code>2015/10/26 21:06:24 Using volume azure-storage-container:"exampleContainerName" (writable=true)
+</code></pre>
+</notextile>
index fb43f783f04ffc5346da7cf46224ce8856eecb02..69ff768ee1f17286c372719d5deb0c6171799356 100644 (file)
@@ -203,7 +203,7 @@ For best performance, we recommend you use Nginx as your Web server front-end, w
 
 <notextile>
 <ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
 
 <li><p>Puma is already included with the API server's gems.  We recommend you run it as a service under <a href="http://smarden.org/runit/">runit</a> or a similar tool.  Here's a sample runit script for that:</p>
 
@@ -244,6 +244,14 @@ exec chpst -m 1073741824 -u webserver-user:webserver-group -e "$envdir" \
   passenger_enabled on;
   # If you're using RVM, uncomment the line below.
   #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+
+  # This value effectively limits the size of API objects users can
+  # create, especially collections.  If you change this, you should
+  # also ensure the following settings match it:
+  # * `client_max_body_size` in the server section below
+  # * `client_max_body_size` in the Workbench Nginx configuration (twice)
+  # * `max_request_size` in the API server's application.yml file
+  client_max_body_size 128m;
 }
 
 upstream api {
@@ -277,10 +285,7 @@ server {
 
   index  index.html index.htm index.php;
 
-  # This value effectively limits the size of API objects users can create,
-  # especially collections.  If you change this, you should also set
-  # `max_request_size` in the API server's application.yml file to the same
-  # value.
+  # Refer to the comment about this setting in the server section above.
   client_max_body_size 128m;
 
   location / {
index 1c31dc4d6ef664b424a5ee6b901c8a2350912b4e..7eec2d645a67960738e13acfce16231532403a22 100644 (file)
@@ -338,6 +338,9 @@ server {
   ssl_certificate         <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
   ssl_certificate_key     <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
 
+  # The server needs to accept potentially large refpacks from push clients.
+  client_max_body_size 50m;
+
   location  / {
     proxy_pass            http://arvados-git-httpd;
   }
index 250d1dcc40363bb4fc73c116ee6edb345bd3a280..aa4f37d639704f33dc10b2f9e71db1c36a6c129a 100644 (file)
@@ -78,6 +78,10 @@ h2. Configure the Docker cleaner
 
 The arvados-docker-cleaner program removes least recently used docker images as needed to keep disk usage below a configured limit.
 
+{% include 'notebox_begin' %}
+This also removes all containers as soon as they exit, as if they were run with `docker run --rm`. If you need to debug or inspect containers after they stop, temporarily stop arvados-docker-cleaner or run it with `--remove-stopped-containers never`.
+{% include 'notebox_end' %}
+
 On Debian-based systems, install runit:
 
 <notextile>
index 370a6e7c3d04fce578ccc1c231e82ec0e44960f6..907f0fdf92b246bc7fdc691938f725376dd9db56 100644 (file)
@@ -175,6 +175,7 @@ rvmexec=""
 
 export ARVADOS_API_HOST=<span class="userinput">uuid_prefix.your.domain</span>
 export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
+export HOME=$(pwd)
 export RAILS_ENV=production
 
 ## Uncomment this line if your cluster uses self-signed SSL certificates:
@@ -185,7 +186,7 @@ export RAILS_ENV=production
 export CRUNCH_JOB_DOCKER_BIN=<span class="userinput">docker.io</span>
 
 fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
-cd /var/www/arvados-api/services/api
+cd /var/www/arvados-api/current
 exec $rvmexec bundle exec ./script/crunch-dispatch.rb 2>&1
 </code></pre>
 </notextile>
diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
new file mode 100644 (file)
index 0000000..5eb4191
--- /dev/null
@@ -0,0 +1,140 @@
+---
+layout: default
+navsection: installguide
+title: Install the keep-web server
+...
+
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
+
+By convention, we use the following hostnames for the keep-web service:
+
+<notextile>
+<pre><code>download.<span class="userinput">uuid_prefix</span>.your.domain
+collections.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+The above hostnames should resolve from anywhere on the internet.
+
+h2. Install keep-web
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install keep-web</span>
+</code></pre>
+</notextile>
+
+Verify that @keep-web@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keep-web -h</span>
+Usage of keep-web:
+  -allow-anonymous
+        Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection. (default false)
+  -attachment-only-host string
+        Accept credentials, and add "Content-Disposition: attachment" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.
+  -listen string
+        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+  -trust-all-content
+        Serve non-public content from a single origin. Dangerous: read docs before using!
+</code></pre>
+</notextile>
+
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
+
+We recommend running @keep-web@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keep-web@ is:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+export ARVADOS_API_TOKEN="<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span>"
+exec sudo -u nobody keep-web \
+ -listen=<span class="userinput">:9002</span> \
+ -attachment-only-host=<span class="userinput">download.uuid_prefix.your.domain</span> \
+ -allow-anonymous \
+ 2&gt;&amp;1
+</code></pre>
+</notextile>
+
+Omit the @-allow-anonymous@ argument if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
+h3. Set up a reverse proxy with SSL support
+
+The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
+
+Note: A wildcard SSL certificate is required in order to support a full-featured secure keep-web service. Without it, keep-web can offer file downloads for all Keep data; however, in order to avoid cross-site scripting vulnerabilities, keep-web refuses to serve private data as web content except when it is accessed using a "secret link" share. With a wildcard SSL certificate and DNS configured appropriately, all data can be served as web content.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keep-web {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           download.<span class="userinput">uuid_prefix</span>.your.domain
+                        collections.<span class="userinput">uuid_prefix</span>.your.domain
+                        *.collections.<span class="userinput">uuid_prefix</span>.your.domain
+                        ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keep-web;
+    proxy_set_header    Host            $host;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @download.uuid_prefix.your.domain@
+* @collections.uuid_prefix.your.domain@
+* @*--collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for @*.uuid_prefix.your.domain@ and your DNS server allows this without interfering with other DNS names.
+* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names.
+
+If neither of the above wildcard options is feasible, only unauthenticated requests (public data and collection sharing links) will be served as web content at @collections.uuid_prefix.your.domain@. The @download@ name will be used to serve authenticated content, but only as file downloads.
+
+h3. Tell Workbench about the keep-web service
+
+Workbench has features like "download file from collection" and "show image" which work better if the content is served by keep-web rather than Workbench itself. We recommend using the two different hostnames ("download" and "collections" above) for file downloads and inline content respectively.
+
+Add the following entry to your Workbench configuration file (@/etc/arvados/workbench/application.yml@). This URL will be used for file downloads.
+
+<notextile>
+<pre><code>keep_web_download_url: https://download.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+</code></pre>
+</notextile>
+
+Additionally, add *one* of the following entries to your Workbench configuration file, depending on your DNS setup. This URL will be used to serve user content that can be displayed in the browser, like image previews and static HTML pages.
+
+<notextile>
+<pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+</code></pre>
+</notextile>
index 6a531a37848d2c2eaa5af34e20fb63c32351f4bf..5a5b66aaaef98c1ee2e42525df2c881655baf3ec 100644 (file)
@@ -4,9 +4,12 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for clients located elsewhere on the internet. Specifically, in contrast to Keepstore:
+* A client writing through Keepproxy generates less network traffic: the client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers.
+* A client can write through Keepproxy without precomputing content hashes. Notably, the browser-based upload feature in Workbench requires Keepproxy.
+* Keepproxy checks API token validity before processing requests. (Clients that can connect directly to Keepstore can use it as scratch space even without a valid API token.)
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).
@@ -36,12 +39,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
@@ -52,7 +56,7 @@ The Keepproxy server needs a token to talk to the API server.
 On the <strong>API server</strong>, use the following command to create the token:
 
 <notextile>
-<pre><code>~/arvados/services/api/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
 hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
 </code></pre></notextile>
 
index 4cb46e13801e78a95b51c26cce0f3696cf8b3f78..efeff65b83dce3c774bffec219821a57f78e6175 100644 (file)
@@ -37,6 +37,10 @@ Verify that Keepstore is functional:
 <pre><code>~$ <span class="userinput">keepstore -h</span>
 2015/05/08 13:41:16 keepstore starting, pid 2565
 Usage of ./keepstore:
+  -azure-storage-account-key-file="": File containing the account key used for subsequent --azure-storage-container-volume arguments.
+  -azure-storage-account-name="": Azure storage account name used for subsequent --azure-storage-container-volume arguments.
+  -azure-storage-container-volume=[]: Use the given container as a storage volume. Can be given multiple times.
+  -azure-storage-replication=3: Replication level to report to clients when data is stored in an Azure container.
   -blob-signature-ttl=1209600: Lifetime of blob permission signatures. See services/api/config/application.default.yml.
   -blob-signing-key-file="": File containing the secret key for generating and verifying blob permission signatures.
   -data-manager-token-file="": File with the API token used by the Data Manager. All DELETE requests or GET /index requests must carry this token.
@@ -54,23 +58,67 @@ Usage of ./keepstore:
 </code></pre>
 </notextile>
 
-If you want access control on your Keepstore server(s), you must specify the @-enforce-permissions@ flag and provide a signing key. The @-blob-signing-key-file@ argument should be a file containing a long random alphanumeric string with no internal line breaks (it is also possible to use a socket or FIFO: keepstore reads it only once, at startup). This key must be the same as the @blob_signing_key@ configured in the "API server":install-api-server.html config/application.yml file.
+h3. Prepare storage volumes
 
-The @-max-buffers@ argument can be used to restrict keepstore's memory use. By default, keepstore will allocate no more than 128 blocks (8 GiB) worth of data buffers at a time. Normally this should be set as high as possible without risking swapping.
+{% include 'notebox_begin' %}
+This section uses a local filesystem as a backing store. If you are using Azure Storage, follow the setup instructions on the "Azure Blob Storage":configure-azure-blob-storage.html page instead.
+{% include 'notebox_end' %}
 
-Prepare one or more volumes for Keepstore to use. Simply create a /keep directory on all the partitions you would like Keepstore to use, and then start Keepstore. For example, using 2 tmpfs volumes:
+There are two ways to specify a set of local directories where keepstore should store its data files.
+# Implicitly, by creating a directory called @keep@ at the top level of each filesystem you intend to use, and omitting @-volume@ arguments.
+# Explicitly, by providing a @-volume@ argument for each directory.
+
+For example, if there are filesystems mounted at @/mnt@ and @/mnt2@:
 
 <notextile>
-<pre><code>~$ <span class="userinput">keepstore -blob-signing-key-file=./blob-signing-key</span>
+<pre><code>~$ <span class="userinput">mkdir /mnt/keep /mnt2/keep</span>
+~$ <span class="userinput">keepstore</span>
 2015/05/08 13:44:26 keepstore starting, pid 2765
 2015/05/08 13:44:26 Using volume [UnixVolume /mnt/keep] (writable=true)
+2015/05/08 13:44:26 Using volume [UnixVolume /mnt2/keep] (writable=true)
 2015/05/08 13:44:26 listening at :25107
 </code></pre>
 </notextile>
 
-It's recommended to run Keepstore under "runit":http://smarden.org/runit/ or something similar.
+Equivalently:
 
-Repeat this section for each Keepstore server you are setting up.
+<notextile>
+<pre><code>~$ <span class="userinput">mkdir /mnt/keep /mnt2/keep</span>
+~$ <span class="userinput">keepstore -volume=/mnt/keep -volume=/mnt2/keep</span>
+2015/05/08 13:44:26 keepstore starting, pid 2765
+2015/05/08 13:44:26 Using volume [UnixVolume /mnt/keep] (writable=true)
+2015/05/08 13:44:26 Using volume [UnixVolume /mnt2/keep] (writable=true)
+2015/05/08 13:44:26 listening at :25107
+</code></pre>
+</notextile>
+
+h3. Run keepstore as a supervised service
+
+We recommend running Keepstore under "runit":http://smarden.org/runit/ or something similar, using a run script like the following:
+
+<notextile>
+<pre><code>#!/bin/sh
+
+exec 2>&1
+exec GOGC=10 GOMAXPROCS=<span class="userinput">4</span> keepstore \
+ -enforce-permissions=true \
+ -blob-signing-key-file=<span class="userinput">/etc/keepstore/blob-signing.key</span> \
+ -max-buffers=<span class="userinput">100</span> \
+ -serialize=true \
+ -volume=<span class="userinput">/mnt/keep</span> \
+ -volume=<span class="userinput">/mnt2/keep</span>
+</code></pre>
+</notextile>
+
+The @GOMAXPROCS@ environment variable determines the maximum number of concurrent threads, and should normally be set to the number of CPU cores present.
+
+The @-max-buffers@ argument limits keepstore's memory usage. It should be set such that @max-buffers * 64MiB + 10%@ fits comfortably in memory. For example, @-max-buffers=100@ is suitable for a host with 8 GiB RAM.
+
+If you want access control on your Keepstore server(s), you must specify the @-enforce-permissions@ flag and provide a signing key. The @-blob-signing-key-file@ argument should be a file containing a long random alphanumeric string with no internal line breaks (it is also possible to use a socket or FIFO: keepstore reads it only once, at startup). This key must be the same as the @blob_signing_key@ configured in the "API server's":install-api-server.html configuration file, @/etc/arvados/api/application.yml@.
+
+h3. Set up additional servers
+
+Repeat the above sections to prepare volumes and bring up supervised services on each Keepstore server you are setting up.
 
 h3. Tell the API server about the Keepstore servers
 
@@ -90,6 +138,3 @@ Make sure to update the @service_host@ value to match each of your Keepstore ser
 }
 EOF</span>
 </code></pre></notextile>
-
-
-
index 52a51a191aafb90188d1906583b419f0135ca49b..a26370d21bf5f4bb877a7ece60957ff9b1eff6d1 100644 (file)
@@ -42,7 +42,7 @@ baseurl=http://rpm.arvados.org/CentOS/$releasever/os/$basearch/
 
 h3. Debian and Ubuntu
 
-Packages are available for Debian 7 ("wheezy"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
+Packages are available for Debian 7 ("wheezy"), Debian 8 ("jessie"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
 
 First, register the Curoverse signing key in apt's database:
 
@@ -53,6 +53,7 @@ Configure apt to retrieve packages from the Arvados package repository. This com
 table(table table-bordered table-condensed).
 |OS version|Command|
 |Debian 7 ("wheezy")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Debian 8 ("jessie")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ jessie main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 12.04 ("precise")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ precise main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 14.04 ("trusty")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 
index af999e95a922b14bbda53339b5d7eb29c20bae6b..ca620f478a8d215066fde5a9ffa157032174f4db 100644 (file)
 ---
 layout: default
 navsection: installguide
-title: Install Single Sign On (SSO) server
+title: Install the Single Sign On (SSO) server
 ...
 
-h2(#dependencies). Install dependencies
+h2(#dependencies). Install prerequisites
 
-h3(#install_git_curl). Install git and curl
-
-{% include 'install_git_curl' %}
+The Arvados package repository includes an SSO server package that can help automate much of the deployment.
 
 h3(#install_ruby_and_bundler). Install Ruby and Bundler
 
 {% include 'install_ruby_and_bundler' %}
 
-h3(#install_postgres). Install PostgreSQL
+h3(#install_web_server). Set up a Web server
+
+For best performance, we recommend you use Nginx as your Web server frontend with a Passenger backend to serve the SSO server. The Passenger team provides "Nginx + Passenger installation instructions":https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html.
 
-{% include 'install_postgres' %}
+Follow the instructions until you see the section that says you are ready to deploy your Ruby application on the production server.
 
-h2(#install). Install SSO server
+h2(#install). Install the SSO server
 
-h3. Get SSO server code and run bundle
+On a Debian-based system, install the following package:
 
 <notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
-~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">bundle install --without=development</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install arvados-sso-server</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following package:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install arvados-sso-server</span>
+</code></pre>
+</notextile>
 
-h2. Configure the SSO server
+h2(#configure). Configure the SSO server
 
-First, copy the example configuration file:
+The package has installed three configuration files in @/etc/arvados/sso@:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
-</code></pre></notextile>
+<pre><code>/etc/arvados/sso/application.yml
+/etc/arvados/sso/database.yml
+/etc/arvados/sso/production.rb
+</code></pre>
+</notextile>
+
+The SSO server runs from the @/var/www/arvados-sso/current/@ directory. The files @/var/www/arvados-sso/current/config/application.yml@, @/var/www/arvados-sso/current/config/database.yml@ and @/var/www/arvados-sso/current/config/environments/production.rb@ are symlinked to the configuration files in @/etc/arvados/sso/@.
 
 The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience only.
 
-Consult @config/application.default.yml@ for a full list of configuration options.  Local configuration goes in @config/application.yml@, do not edit @config/application.default.yml@.
+Consult @config/application.default.yml@ for a full list of configuration options.  Local configuration goes in @/etc/arvados/sso/application.yml@, do not edit @config/application.default.yml@.
 
 h3(#uuid_prefix). uuid_prefix
 
 Generate a uuid prefix for the single sign on service.  This prefix is used to identify user records as originating from this site.  It must be exactly 5 lowercase ASCII letters and/or digits.  You may use the following snippet to generate a uuid prefix:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
 abcde
 </code></pre></notextile>
 
-Edit @config/application.yml@ and set @uuid_prefix@ in the "common" section.
+Edit @/etc/arvados/sso/application.yml@ and set @uuid_prefix@ in the "common" section.
 
 h3(#secret_token). secret_token
 
 Generate a new secret token for signing cookies:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 </code></pre></notextile>
 
-Edit @config/application.yml@ and set @secret_token@ in the "common" section.
+Edit @/etc/arvados/sso/application.yml@ and set @secret_token@ in the "common" section.
+
+There are other configuration options in @/etc/arvados/sso/application.yml@. See the "Authentication methods":install-sso.html#authentication_methods section below for more details.
 
 h2(#database). Set up the database
 
-Generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
+If PostgreSQL was newly installed as a dependency of the @arvados-sso-server@ package, you will need to start the service.
+
+On a Debian-based system:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">sudo service postgresql start</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, we also need to initialize the database system:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo service postgresql initdb</span>
+~$ <span class="userinput">sudo service postgresql start</span>
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+
+If you are installing on CentOS6, you will need to modify PostgreSQL's configuration to allow password authentication for local users. The default configuration allows 'ident' only. The following commands will make the configuration change, and restart PostgreSQL for it to take effect.
+<br/>
+<notextile>
+<pre><code>~$ <span class="userinput">sudo sed -i -e "s/127.0.0.1\/32          ident/127.0.0.1\/32          md5/" /var/lib/pgsql/data/pg_hba.conf</span>
+~$ <span class="userinput">sudo sed -i -e "s/::1\/128               ident/::1\/128               md5/" /var/lib/pgsql/data/pg_hba.conf</span>
+~$ <span class="userinput">sudo service postgresql restart</span>
+</code></pre>
+</notextile>
+{% include 'notebox_end' %}
+
+
+Next, generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
+
+<notextile>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
 abcdefghijklmnopqrstuvwxyz012345689
 </code></pre></notextile>
 
+Configure the SSO server to connect to your database by updating @/etc/arvados/sso/database.yml@. Replace the @xxxxxxxx@ database password placeholder with the new password you generated above. Be sure to update the @production@ section.
+
+<notextile>
+<pre><code>~$ <span class="userinput">editor /etc/arvados/sso/database.yml</span>
+</code></pre></notextile>
+
 Create a new database user with permission to create its own databases.
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
 Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
 Enter it again: <span class="userinput">paste-database-password-you-generated</span>
 </code></pre></notextile>
 
-Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.  If you are planning a production system, update the @production@ section, otherwise use @development@.
+Rails will take care of creating the database, based on the information from @/etc/arvados/sso/database.yml@. 
+
+Alternatively, if the database user you intend to use for the SSO server should not be allowed to create new databases, the user and the database can be created like this:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.example config/database.yml</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">edit config/database.yml</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --encrypted -R -S --pwprompt arvados_sso</span>
+Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
+Enter it again: <span class="userinput">paste-database-password-you-generated</span>
+~$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
 </code></pre></notextile>
 
-Create and initialize the database. If you are planning a production system, choose the @production@ rails environment, otherwise use @development@.
+h2(#reconfigure_package). Reconfigure the package
+
+Now that the @/etc/arvados/sso/application.yml@ and @/etc/arvados/sso/database.yml@ files have been updated, we need to reconfigure our installed package. Doing so will create and/or initialize the database and precompile the assets.
+
+On a Debian-based system:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:setup</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo dpkg-reconfigure arvados-sso-server</span>
+</code></pre>
+</notextile>
 
-Alternatively, if the database user you intend to use for the SSO server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above:
+On a Red Hat-based system, we need to reinstall the package instead:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:schema:load</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo yum reinstall arvados-sso-server</span>
+</code></pre>
+</notextile>
 
 h2(#client). Create arvados-server client
 
 Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the values for @sso_app_id@ and @sso_app_secret@ in the "API server's SSO settings.":install-api-server.html#omniauth
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+~$ <span class="userinput">cd /var/www/arvados-sso/current</span>
+/var/www/arvados-sso/current$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
 :001 &gt; <span class="userinput">c = Client.new</span>
 :002 &gt; <span class="userinput">c.name = "joshid"</span>
 :003 &gt; <span class="userinput">c.app_id = "arvados-server"</span>
@@ -118,15 +175,56 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 </code></pre>
 </notextile>
 
-h2(#assets). Precompile assets
+h2(#configure_web_server). Configure your web server
 
-If you are running in the production environment, you must precompile the assets:
+Edit the http section of your Nginx configuration to run the Passenger server and act as a frontend for it. You might add a block like the following, adding SSL and logging parameters to taste:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
+<pre><code>server {
+  listen 127.0.0.1:8900;
+  server_name localhost-sso;
+
+  root   /var/www/arvados-sso/current/public;
+  index  index.html;
+
+  passenger_enabled on;
+  # If you're not using RVM, comment out the line below.
+  passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+}
+
+upstream sso {
+  server     127.0.0.1:8900  fail_timeout=10s;
+}
+
+proxy_http_version 1.1;
+
+server {
+  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name  auth.<span class="userinput">your.domain</span>;
+
+  ssl on;
+  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  index  index.html;
+
+  location / {
+    proxy_pass            http://sso;
+    proxy_redirect        off;
+    proxy_connect_timeout 90s;
+    proxy_read_timeout    300s;
+
+    proxy_set_header      X-Forwarded-Proto https;
+    proxy_set_header      Host $http_host;
+    proxy_set_header      X-Real-IP $remote_addr;
+    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
 </code></pre>
 </notextile>
 
+Finally, restart Nginx and your Arvados SSO server should be up and running. You can verify that by visiting the URL you configured your Nginx web server to listen on in the server section above (port 443). Read on if you want to configure your Arvados SSO server to use a different authentication backend.
+
 h2(#authentication_methods). Authentication methods
 
 Authentication methods are configured in @application.yml@.  Currently three authentication methods are supported: local accounts, LDAP, and Google+.  If neither Google+ nor LDAP are enabled, the SSO server defaults to local user accounts.   Only one authentication mechanism should be in use at a time.
@@ -150,7 +248,7 @@ For more information about configuring backend support for sending email (requir
 If @allow_account_registration@ is false, you may manually create local accounts on the SSO server from the rails console:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+<pre><code>~$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
 :001 &gt; <span class="userinput">user = User.new(:email =&gt; "test@example.com")</span>
 :002 &gt; <span class="userinput">user.password = "passw0rd"</span>
 :003 &gt; <span class="userinput">user.save!</span>
@@ -210,76 +308,4 @@ In order to use Google+ authentication, you must use the <a href="https://consol
   google_oauth2_client_id: <span class="userinput">"---YOUR---CLIENT---ID---HERE--"-</span>
   google_oauth2_client_secret: <span class="userinput">"---YOUR---CLIENT---SECRET---HERE--"-</span></code></pre></notextile>
 
-h2(#start). Set up a Web server
-
-For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend to serve the SSO server.  To do that:
-
-<notextile>
-<ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
-
-<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for it.  You might add a block like the following, adding SSL and logging parameters to taste:</p>
-
-<pre><code>server {
-  listen 127.0.0.1:8900;
-  server_name localhost-sso;
-
-  root   <span class="userinput">/YOUR/PATH/TO/sso-devise-omniauth-provider/public</span>;
-  index  index.html index.htm index.php;
-
-  passenger_enabled on;
-  # If you're using RVM, uncomment the line below.
-  #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
-}
-
-upstream sso {
-  server     127.0.0.1:8900  fail_timeout=10s;
-}
-
-proxy_http_version 1.1;
-
-server {
-  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name  auth.<span class="userinput">your.domain</span>;
-
-  ssl on;
-  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
-  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
-
-  index  index.html index.htm index.php;
-
-  location / {
-    proxy_pass            http://sso;
-    proxy_redirect        off;
-    proxy_connect_timeout 90s;
-    proxy_read_timeout    300s;
-
-    proxy_set_header      X-Forwarded-Proto https;
-    proxy_set_header      Host $http_host;
-    proxy_set_header      X-Real-IP $remote_addr;
-    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
-  }
-}
-</code></pre>
-</li>
-
-<li>Restart Nginx.</li>
-
-</ol>
-</notextile>
 
-{% include 'notebox_begin' %}
-
-If you see the following warning "you may safely ignore it":https://stackoverflow.com/questions/10374871/no-secret-option-provided-to-racksessioncookie-warning:
-
-<pre>
-SECURITY WARNING: No secret option provided to Rack::Session::Cookie.
-This poses a security threat. It is strongly recommended that you
-provide a secret to prevent exploits that may be possible from crafted
-cookies. This will not be supported in future versions of Rack, and
-future versions will even invalidate your existing user cookies.
-
-Called from: /var/lib/gems/2.1.0/gems/actionpack-3.2.8/lib/action_dispatch/middleware/session/abstract_store.rb:28:in `initialize'.
-</pre>
-
-{% include 'notebox_end' %}
index 52a69f502b1d6e65ad920230ab91599f86be0b19..d05d94daa102e0b6b3a085c06e887905da775f29 100644 (file)
@@ -6,7 +6,7 @@ title: Install Workbench
 
 h2. Install prerequisites
 
-The Arvados package repository includes Workbench server package that can help automate much of the deployment.
+The Arvados package repository includes Workbench server package that can help automate much of the deployment.
 
 h3(#install_ruby_and_bundler). Install Ruby and Bundler
 
@@ -93,7 +93,7 @@ For best performance, we recommend you use Nginx as your Web server front-end, w
 
 <notextile>
 <ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
 
 <li>If you're deploying on CentOS and using the python27 Software Collection, configure Nginx to use it:
 
@@ -115,6 +115,10 @@ For best performance, we recommend you use Nginx as your Web server front-end, w
   passenger_enabled on;
   # If you're using RVM, uncomment the line below.
   #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+
+  # `client_max_body_size` should match the corresponding setting in
+  # the API server's Nginx configuration.
+  client_max_body_size 128m;
 }
 
 upstream workbench {
@@ -132,6 +136,8 @@ server {
   ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
 
   index  index.html index.htm index.php;
+  # `client_max_body_size` should match the corresponding setting in
+  # the API server's Nginx configuration.
   client_max_body_size 128m;
 
   location / {
index 018213c1a1815061f3265a85040937dcb53652a7..ca494fe17a570e0868af2157fcc92dda6b480cc2 100644 (file)
@@ -21,6 +21,20 @@ Options:
 </pre>
 </notextile>
 
+h3(#arv-get). arv get
+
+@arv get@ can be used to get a textual representation of Arvados objects from the command line. The output can be limited to a subset of the object's fields. This command can be used with only the knowledge of an object's UUID.
+
+<notextile>
+<pre>
+$ <code class="userinput">arv get --help</code>
+Usage: arv [--format json|yaml] get [uuid] [fields...]
+
+Fetch the specified Arvados object, select the specified fields,
+and print a text representation.
+</pre>
+</notextile>
+
 h3(#arv-edit). arv edit
 
 @arv edit@ can be used to edit Arvados objects from the command line. Arv edit opens up the editor of your choice (set the EDITOR environment variable) with the json or yaml description of the object. Saving the file will update the Arvados object on the API server, if it passes validation.
@@ -157,7 +171,7 @@ $ <code class="userinput">arv keep --help</code>
 Usage: arv keep [method] [--parameters]
 Use 'arv keep [method] --help' to get more information about specific methods.
 
-Available methods: ls, get, put, less, check, docker
+Available methods: ls, get, put, docker
 </pre>
 </notextile>
 
index a9eb8c135943fd58a9f7b9d91627af19059386a0..a9fe1a9a11c3f182440b2eaeee2a56ba82aa6cf2 100644 (file)
@@ -4,7 +4,7 @@ navsection: userguide
 title: Accessing an Arvados VM with SSH - Unix Environments
 ...
 
-This document is for accessing an arvados VM using SSK keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
+This document is for accessing an Arvados VM using SSH keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
 
 {% include 'ssh_intro' %}
 
index c3a06405493d9c340b010547e88057aacb6039bf..98fadc9f204a35e142e6ca61ce47b2b30c811ca6 100644 (file)
@@ -4,7 +4,7 @@ navsection: userguide
 title: Accessing an Arvados VM with SSH - Windows Environments
 ...
 
-This document is for accessing an arvados VM using SSK keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
+This document is for accessing an Arvados VM using SSH keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
 
 {% include 'ssh_intro' %}
 
@@ -23,7 +23,7 @@ h3. Step 1 - Adding PuTTY to the PATH
 # After downloading PuTTY and installing it, you should have a PuTTY folder in @C:\Program Files\@ or @C:\Program Files (x86)\@ (if you are using a 64 bit operating system).
 # Open the Control Panel.
 # Select _Advanced System Settings_, and choose _Environment Variables_.
-If you are using newer systems like Windows 7, you may use the following to open _Advanced System Settings_. Open Control Panel. Click on _System and Security_. Click on _System_. Click on _Advanced system settings_ and choose _Environment Variables..._
+If you are using newer systems like Windows 10, you may use the following to open _Advanced System Settings_. Open Control Panel. Click on _System and Security_. Click on _System_. Click on _Advanced system settings_ and choose _Environment Variables..._
 # Under system variables, find and edit @PATH@.
 # If you installed PuTTY in @C:\Program Files\PuTTY\@, add the following to the end of PATH:
 <code>;C:\Program Files\PuTTY</code>
@@ -55,7 +55,7 @@ h3. Initial configuration
 # Open PuTTY from the Start Menu.
 # On the Session screen set the Host Name (or IP address) to “shell”, which is the hostname listed in the _Virtual Machines_ page.
 # On the Session screen set the Port to “22”.
-# On the Connection %(rarr)&rarr;% Data screen set the Auto-login username to the username listed in the *logins* column on the Arvados Workbench _Settings %(rarr)&rarr;% Virtual machines_ page.
+# On the Connection %(rarr)&rarr;% Data screen set the Auto-login username to the username listed in the *Login name* column on the Arvados Workbench Virtual machines_ page.
 # On the Connection %(rarr)&rarr;% Proxy screen set the Proxy Type to “Local”.
 # On the Connection %(rarr)&rarr;% Proxy screen in the “Telnet command, or local proxy command” box enter:
 <code>plink -P 2222 turnout@switchyard.{{ site.arvados_api_host }} %host</code>
index 50fa4747e74541943ba68782d3cdb8c8912e47f6..58ad868e5e50083576ade0dd9854ebc309dc580f 100644 (file)
@@ -10,7 +10,7 @@ h2(#webshell). Access VM using webshell
 
 Webshell gives you access to an arvados virtual machine from your browser with no additional setup.
 
-In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Virtual machines* to see the list of virtual machines you can access.
+In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Virtual machines* to see the list of virtual machines you can access.  If you do not have access to any virtual machines, please click on <span class="btn btn-sm btn-primary">Send request for shell access</span> or send an email to "support@curoverse.com":mailto:support@curoverse.com.
 
 Each row in the Virtual Machines panel lists the hostname of the VM, along with a <code>Log in as *you*</code> button under the column "Web shell beta". Clicking on this button will open up a webshell terminal for you in a new browser tab and log you in.
 
index 0967cbc308b3054ed7b7cd32f0903d7b1590f353..2d2120268aeecba5a532b1e4903151f714763988 100644 (file)
@@ -13,9 +13,7 @@ This guide provides a reference for using Arvados to solve big data bioinformati
 * Storing and querying metadata about genome sequence files, such as human subjects and their phenotypic traits using the "Arvados Metadata Database.":{{site.baseurl}}/user/topics/tutorial-trait-search.html
 * Accessing, organizing, and sharing data, pipelines and results using the "Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html web application.
 
-The examples in this guide use the Arvados instance located at <a href="{{site.arvados_workbench_host}}/" target="_blank">{{site.arvados_workbench_host}}</a>.  If you are using a different Arvados instance replace @{{ site.arvados_workbench_host }}@ with your private instance in all of the examples in this guide.
-
-Curoverse maintains a public Arvados instance located at <a href="https://workbench.qr1hi.arvadosapi.com/" target="_blank">https://workbench.qr1hi.arvadosapi.com/</a>.  You must have an account in order to use this service.  If you would like to request an account, please send an email to "arvados@curoverse.com":mailto:arvados@curoverse.com.
+The examples in this guide use the public Arvados instance located at <a href="{{site.arvados_workbench_host}}/" target="_blank">{{site.arvados_workbench_host}}</a>.  If you are using a different Arvados instance replace @{{ site.arvados_workbench_host }}@ with your private instance in all of the examples in this guide.
 
 h2. Typographic conventions
 
index e6c83affb5d9cb86fab64803027c9f8369c5e673..1a31d126da698495eca853b72e1dcca9424c94a1 100644 (file)
@@ -190,7 +190,7 @@ You are now able to specify the runtime environment for your program using the @
 {% code 'example_docker' as javascript %}
 </notextile>
 
-* The @docker_image@ field can be one of: the Docker repository name (as shown above), the Docker image hash, the Arvados collection UUID, or the Arvados collection portable data hash.
+* The @docker_image@ field can be one of: the Docker repository name (as shown above), the Docker image hash, or the Arvados collection portable data hash.
 
 h2. Share Docker images
 
index a46a1d9318a5a06e28d0be15e60ced23b076415d..f9f0af521fd74ec05d42e4d5e48914081148e65f 100644 (file)
@@ -54,36 +54,47 @@ Create a git branch named *tutorial_branch* in the *tutorial* Arvados git reposi
 
 <notextile>
 <pre><code>~$ <span class="userinput">cd tutorial</span>
-~$ <span class="userinput">git checkout -b tutorial_branch</span>
+~/tutorial$ <span class="userinput">git checkout -b tutorial_branch</span>
 </code></pre>
 </notextile>
 
-h2. Adding files or scripts to an Arvados repository
+h2. Adding scripts to an Arvados repository
 
-First, create a file named *tutorial.txt* in the local repository. Although this tutorial shows how to add a text file to Arvados, the same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository.
+Arvados crunch scripts need to be added in a *crunch_scripts* subdirectory in the repository. If this subdirectory does not exist, first create it in the local repository and change to that directory:
 
 <notextile>
-<pre><code>~$ <span class="userinput">echo 'hello world' > tutorial.txt</span>
-</code></pre>
+<pre><code>~/tutorial$ <span class="userinput">mkdir crunch_scripts</span>
+~/tutorial$ <span class="userinput">cd crunch_scripts</span></code></pre>
 </notextile>
 
-Next, add the new file to the git index.
+Next, using @nano@ or your favorite Unix text editor, create a new file called @hash.py@ in the @crunch_scripts@ directory.
 
-<notextile>
-<pre><code>~$ <span class="userinput">git add tutorial.txt</span>
-</code></pre>
-</notextile>
+notextile. <pre>~/tutorial/crunch_scripts$ <code class="userinput">nano hash.py</code></pre>
+
+Add the following code to compute the MD5 hash of each file in a collection
+
+<notextile> {% code 'tutorial_hash_script_py' as python %} </notextile>
 
-Next, commit all the changes to the local repository, along with a commit message that describes what this script does.
+Make the file executable:
+
+notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">chmod +x hash.py</span></code></pre>
+
+Next, add the file to the git repository.  This tells @git@ that the file should be included on the next commit.
+
+notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git add hash.py</span></code></pre>
+
+Next, commit your changes.  All staged changes are recorded into the local git repository:
 
 <notextile>
-<pre><code>~$ <span class="userinput">git commit -a -m "Added tutorial.txt"</span>
+<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git commit -m "my first script"</span>
 </code></pre>
 </notextile>
 
-Finally, push the changes in the local repository to the remote repository.
+Finally, upload your changes to the remote repository:
 
 <notextile>
-<pre><code>~$ <span class="userinput">git push origin tutorial_branch</span>
+<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git push origin tutorial_branch</span>
 </code></pre>
 </notextile>
+
+Although this tutorial shows how to add a python script to Arvados, the same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository.
index 73c02effbd0a7d7fc49924b7ca5f477019b114cb..32cc3ca79de8da5ffae0575c8463dca4267ea05d 100644 (file)
@@ -16,7 +16,7 @@ h2. Arv-mount
 * It is easy for existing tools to access files in Keep.
 * Data is streamed on demand.  It is not necessary to download an entire file or collection to start processing.
 
-The default mode permits browsing any collection in Arvados as a subdirectory under the mount directory.  To avoid having to fetch a potentially large list of all collections, collection directories only come into existence when explicitly accessed by their Keep locator. For instance, a collection may be found by its content hash in the @keep/by_id@ directory.
+The default mode permits browsing any collection in Arvados as a subdirectory under the mount directory.  To avoid having to fetch a potentially large list of all collections, collection directories only come into existence when explicitly accessed by UUID or portable data hash. For instance, a collection may be found by its content hash in the @keep/by_id@ directory.
 
 <notextile>
 <pre><code>~$ <span class="userinput">mkdir -p keep</span>
@@ -33,7 +33,7 @@ var-GS000016015-ASM.tsv.bz2
 
 The last line unmounts Keep.  Subdirectories will no longer be accessible.
 
-Within each directory on Keep, there is a @.arvados#collection@ file that does not show up with @ls@. Its contents include, for instance, the @portable_data_hash@, which is the same as the Keep locator.
+In the top level directory of each collection, arv-mount provides a special file called @.arvados#collection@ that contains a JSON-formatted API record for the collection. This can be used to determine the collection's @portable_data_hash@, @uuid@, etc. This file does not show up in @ls@ or @ls -a@.
 
 h3. Modifying files and directories in Keep
 
index f9522fbef01658f68b20493516b823c2e2d3611f..fac573aca9bdbc30a1f93b8f75e4af10e0818cfe 100644 (file)
@@ -18,13 +18,13 @@ h3. Steps
 
 # Start from the *Workbench Dashboard*.  You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
 # Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span> button.  This will open a dialog box titled *Choose a pipeline to run*.
-# Click to open the *All projects <span class="caret"></span>* menu.  Under the *Projects shared with me* header, select *<i class="fa fa-fw fa-share-alt"></i> Arvados Tutorial*.
+# In the search box, type in *Tutorial align using bwa mem*.
 # Select *<i class="fa fa-fw fa-gear"></i> Tutorial align using bwa mem* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button.  This will create a new pipeline in your *Home* project and will open it. You can now supply the inputs for the pipeline.
 # The first input parameter to the pipeline is *"reference_collection" parameter for run-command script in bwa-mem component*.  Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath that header.  This will open a dialog box titled *Choose a dataset for "reference_collection" parameter for run-command script in bwa-mem component*.
-# Once again, open the *All projects <span class="caret"></span>* menu and select *<i class="fa fa-fw fa-share-alt"></i> Arvados Tutorial*.  Select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
+# Open the *Home <span class="caret"></span>* menu and select *All Projects*. Search for and select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
 # Repeat the previous two steps to set the *"sample" parameter for run-command script in bwa-mem component* parameter to *<i class="fa fa-fw fa-archive"></i> Tutorial sample exome*.
 # Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button.  The page updates to show you that the pipeline has been submitted to run on the Arvados cluster.
-# After the pipeline starts running, you can track the progress by watching log messages from jobs.  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label under the *job* column when the pipeline completes successfully.
+# After the pipeline starts running, you can track the progress by watching log messages from jobs.  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label when the pipeline completes successfully.
 # Click on the *Output* link to see the results of the job.  This will load a new page listing the output files from this pipeline.  You'll see the output SAM file from the alignment tool under the *Files* tab.
 # Click on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> download button to the right of the SAM file to download your results.
 
diff --git a/sdk/cli/LICENSE-2.0.txt b/sdk/cli/LICENSE-2.0.txt
new file mode 100644 (file)
index 0000000..d645695
--- /dev/null
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
index 8491aeec137b3353dc8115f24665da7181fa4294..526de4a8cff31b35ed647fcc91b5fa3b7d349f3f 100644 (file)
@@ -16,7 +16,9 @@ Gem::Specification.new do |s|
   s.email       = 'gem-dev@curoverse.com'
   #s.bindir      = '.'
   s.licenses    = ['Apache License, Version 2.0']
-  s.files       = ["bin/arv","bin/arv-run-pipeline-instance","bin/arv-crunch-job","bin/arv-tag","bin/crunch-job"]
+  s.files       = ["bin/arv", "bin/arv-run-pipeline-instance",
+                   "bin/arv-crunch-job", "bin/arv-tag", "bin/crunch-job",
+                   "LICENSE-2.0.txt"]
   s.executables << "arv"
   s.executables << "arv-run-pipeline-instance"
   s.executables << "arv-crunch-job"
index b757855b1f22653eef4a6faaa96cc2c684214e86..185a5b0673f1dc1c5afc5ed6530386d8255daaf4 100755 (executable)
@@ -5,6 +5,7 @@
 # Ward Vandewege <ward@curoverse.com>
 
 require 'fileutils'
+require 'shellwords'
 
 if RUBY_VERSION < '1.9.3' then
   abort <<-EOS
@@ -85,7 +86,15 @@ def init_config
 end
 
 
-subcommands = %w(copy create edit keep pipeline run tag ws)
+subcommands = %w(copy create edit get keep pipeline run tag ws)
+
+def exec_bin bin, opts
+  bin_path = `which #{bin.shellescape}`.strip
+  if bin_path.empty?
+    raise "#{bin}: command not found"
+  end
+  exec bin_path, *opts
+end
 
 def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
   case subcommand
@@ -93,28 +102,27 @@ def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
     arv_create client, arvados, global_opts, remaining_opts
   when 'edit'
     arv_edit client, arvados, global_opts, remaining_opts
+  when 'get'
+    arv_get client, arvados, global_opts, remaining_opts
   when 'copy', 'tag', 'ws', 'run'
-    exec `which arv-#{subcommand}`.strip, *remaining_opts
+    exec_bin "arv-#{subcommand}", remaining_opts
   when 'keep'
     @sub = remaining_opts.shift
     if ['get', 'put', 'ls', 'normalize'].index @sub then
       # Native Arvados
-      exec `which arv-#{@sub}`.strip, *remaining_opts
-    elsif ['less', 'check'].index @sub then
-      # wh* shims
-      exec `which wh#{@sub}`.strip, *remaining_opts
+      exec_bin "arv-#{@sub}", remaining_opts
     elsif @sub == 'docker'
-      exec `which arv-keepdocker`.strip, *remaining_opts
+      exec_bin "arv-keepdocker", remaining_opts
     else
       puts "Usage: arv keep [method] [--parameters]\n"
       puts "Use 'arv keep [method] --help' to get more information about specific methods.\n\n"
-      puts "Available methods: ls, get, put, less, check, docker"
+      puts "Available methods: ls, get, put, docker"
     end
     abort
   when 'pipeline'
     sub = remaining_opts.shift
     if sub == 'run'
-      exec `which arv-run-pipeline-instance`.strip, *remaining_opts
+      exec_bin "arv-run-pipeline-instance", remaining_opts
     else
       puts "Usage: arv pipeline [method] [--parameters]\n"
       puts "Use 'arv pipeline [method] --help' to get more information about specific methods.\n\n"
@@ -150,14 +158,7 @@ end
 
 def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
 
-  content = case global_opts[:format]
-            when 'json'
-              Oj.dump(initial_obj, :indent => 1)
-            when 'yaml'
-              initial_obj.to_yaml
-            else
-              abort "Unrecognized format #{global_opts[:format]}"
-            end
+  content = get_obj_content initial_obj, global_opts
 
   tmp_file = Tempfile.new([tmp_stem, ".#{global_opts[:format]}"])
   tmp_file.write(content)
@@ -182,6 +183,8 @@ def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
                    Oj.load(newcontent)
                  when 'yaml'
                    YAML.load(newcontent)
+                 else
+                   abort "Unrecognized format #{global_opts[:format]}"
                  end
 
         yield newobj
@@ -246,20 +249,7 @@ def check_response result
   results
 end
 
-def arv_edit client, arvados, global_opts, remaining_opts
-  uuid = remaining_opts.shift
-  if uuid.nil? or uuid == "-h" or uuid == "--help"
-    puts head_banner
-    puts "Usage: arv edit [uuid] [fields...]\n\n"
-    puts "Fetch the specified Arvados object, select the specified fields, \n"
-    puts "open an interactive text editor on a text representation (json or\n"
-    puts "yaml, use --format) and then update the object.  Will use 'nano'\n"
-    puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
-    exit 255
-  end
-
-  # determine controller
-
+def lookup_uuid_rsc arvados, uuid
   m = /([a-z0-9]{5})-([a-z0-9]{5})-([a-z0-9]{15})/.match uuid
   if !m
     if /^[a-f0-9]{32}/.match uuid
@@ -282,6 +272,11 @@ def arv_edit client, arvados, global_opts, remaining_opts
     abort "Could not determine resource type #{m[2]}"
   end
 
+  return rsc
+end
+
+def fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
   begin
     result = client.execute(:api_method => eval('arvados.' + rsc + '.get'),
                             :parameters => {"uuid" => uuid},
@@ -289,15 +284,45 @@ def arv_edit client, arvados, global_opts, remaining_opts
                             :headers => {
                               authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
                             })
-    oldobj = check_response result
+    obj = check_response result
   rescue => e
     abort "Server error: #{e}"
   end
 
   if remaining_opts.length > 0
-    oldobj.select! { |k, v| remaining_opts.include? k }
+    obj.select! { |k, v| remaining_opts.include? k }
+  end
+
+  return obj
+end
+
+def get_obj_content obj, global_opts
+  content = case global_opts[:format]
+            when 'json'
+              Oj.dump(obj, :indent => 1)
+            when 'yaml'
+              obj.to_yaml
+            else
+              abort "Unrecognized format #{global_opts[:format]}"
+            end
+  return content
+end
+
+def arv_edit client, arvados, global_opts, remaining_opts
+  uuid = remaining_opts.shift
+  if uuid.nil? or uuid == "-h" or uuid == "--help"
+    puts head_banner
+    puts "Usage: arv edit [uuid] [fields...]\n\n"
+    puts "Fetch the specified Arvados object, select the specified fields, \n"
+    puts "open an interactive text editor on a text representation (json or\n"
+    puts "yaml, use --format) and then update the object.  Will use 'nano'\n"
+    puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
+    exit 255
   end
 
+  rsc = lookup_uuid_rsc arvados, uuid
+  oldobj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
   edit_and_commit_object oldobj, uuid, global_opts do |newobj|
     newobj.select! {|k| newobj[k] != oldobj[k]}
     if !newobj.empty?
@@ -318,6 +343,24 @@ def arv_edit client, arvados, global_opts, remaining_opts
   exit 0
 end
 
+def arv_get client, arvados, global_opts, remaining_opts
+  uuid = remaining_opts.shift
+  if uuid.nil? or uuid == "-h" or uuid == "--help"
+    puts head_banner
+    puts "Usage: arv [--format json|yaml] get [uuid] [fields...]\n\n"
+    puts "Fetch the specified Arvados object, select the specified fields,\n"
+    puts "and print a text representation.\n"
+    exit 255
+  end
+
+  rsc = lookup_uuid_rsc arvados, uuid
+  obj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+  content = get_obj_content obj, global_opts
+
+  puts content
+  exit 0
+end
+
 def arv_create client, arvados, global_opts, remaining_opts
   types = resource_types(arvados.discovery_document)
   create_opts = Trollop::options do
index d9e00dc22bf7d69ab91aaf2b9efce1616e1842ec..70e2f42ede56ab9949f7ee9105c730b83a545966 100755 (executable)
@@ -1,63 +1,5 @@
 #!/usr/bin/env ruby
 
-# == Synopsis
-#
-#  arv-run-pipeline-instance --template pipeline-template-uuid [options] [--] [parameters]
-#  arv-run-pipeline-instance --instance pipeline-instance-uuid [options]
-#
-# Satisfy a pipeline template by finding or submitting a mapreduce job
-# for each pipeline component.
-#
-# == Options
-#
-# [--template uuid] Use the specified pipeline template.
-#
-# [--template path] Load the pipeline template from the specified
-#                   local file.
-#
-# [--instance uuid] Use the specified pipeline instance.
-#
-# [-n, --dry-run] Do not start any new jobs or wait for existing jobs
-#                 to finish. Just find out whether jobs are finished,
-#                 queued, or running for each component
-#
-# [--submit] Do not try to satisfy any components. Just
-#                          create an instance, print its UUID to
-#                          stdout, and exit.
-#
-# [--no-wait] Make only as much progress as possible without entering
-#             a sleep/poll loop.
-#
-# [--no-reuse] Do not reuse existing jobs to satisfy pipeline
-#              components. Submit a new job for every component.
-#
-# [--debug] Print extra debugging information on stderr.
-#
-# [--debug-level N] Increase amount of debugging information. Default
-#                   1, possible range 0..3.
-#
-# [--status-text path] Print plain text status report to a file or
-#                      fifo. Default: /dev/stdout
-#
-# [--status-json path] Print JSON status report to a file or
-#                      fifo. Default: /dev/null
-#
-# [--description] Description for the pipeline instance.
-#
-# == Parameters
-#
-# [param_name=param_value]
-#
-# [param_name param_value] Set (or override) the default value for
-#                          every parameter with the given name.
-#
-# [component_name::param_name=param_value]
-# [component_name::param_name param_value]
-# [--component_name::param_name=param_value]
-# [--component_name::param_name param_value] Set the value of a
-#                                            parameter for a single
-#                                            component.
-#
 class WhRunPipelineInstance
 end
 
@@ -92,6 +34,28 @@ end
 
 p = Trollop::Parser.new do
   version __FILE__
+  banner(<<EOF)
+
+Usage:
+  arv-run-pipeline-instance --template TEMPLATE_UUID [options] [--] [parameters]
+  arv-run-pipeline-instance --instance INSTANCE_UUID [options] [--] [parameters]
+
+Parameters:
+  param_name=param_value
+  param_name param_value
+                         Set (or override) the default value for every
+                         pipeline component parameter with the given
+                         name.
+
+  component_name::param_name=param_value
+  component_name::param_name param_value
+  --component_name::param_name=param_value
+  --component_name::param_name param_value
+                         Set the value of a parameter for a single
+                         pipeline component.
+
+Options:
+EOF
   opt(:dry_run,
       "Do not start any new jobs or wait for existing jobs to finish. Just find out whether jobs are finished, queued, or running for each component.",
       :type => :boolean,
@@ -149,6 +113,10 @@ p = Trollop::Parser.new do
       "Description for the pipeline instance.",
       :short => :none,
       :type => :string)
+  opt(:project_uuid,
+      "UUID of the project for the pipeline instance.",
+      short: :none,
+      type: :string)
   stop_on [:'--']
 end
 $options = Trollop::with_standard_exception_handling p do
@@ -440,18 +408,23 @@ class WhRunPipelineInstance
         end
       end
     else
-      description = $options[:description]
-      description = ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : "")) if !description
-      @instance = PipelineInstance.
-        create(components: @components,
-               properties: {
-                 run_options: {
-                   enable_job_reuse: !@options[:no_reuse]
-                 }
-               },
-               pipeline_template_uuid: @template[:uuid],
-               description: description,
-               state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient'))
+      description = $options[:description] ||
+                    ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : ""))
+      instance_body = {
+        components: @components,
+        properties: {
+          run_options: {
+            enable_job_reuse: !@options[:no_reuse]
+          }
+        },
+        pipeline_template_uuid: @template[:uuid],
+        description: description,
+        state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient')
+      }
+      if @options[:project_uuid]
+        instance_body[:owner_uuid] = @options[:project_uuid]
+      end
+      @instance = PipelineInstance.create(instance_body)
     end
     self
   end
index 423f4cc043409b0f63f96c6c84333b746e21d19a..70d05f023c93147a2dcad2b4b61335102acfc832 100755 (executable)
@@ -127,6 +127,7 @@ my $job_api_token;
 my $no_clear_tmp;
 my $resume_stash;
 my $docker_bin = "docker.io";
+my $docker_run_args = "";
 GetOptions('force-unlock' => \$force_unlock,
            'git-dir=s' => \$git_dir,
            'job=s' => \$jobspec,
@@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock,
            'no-clear-tmp' => \$no_clear_tmp,
            'resume-stash=s' => \$resume_stash,
            'docker-bin=s' => \$docker_bin,
+           'docker-run-args=s' => \$docker_run_args,
     );
 
 if (defined $job_api_token) {
@@ -411,7 +413,7 @@ if (!defined $no_clear_tmp) {
 }
 
 # If this job requires a Docker image, install that.
-my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem);
+my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
 if ($docker_locator = $Job->{docker_image_locator}) {
   ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
   if (!$docker_hash)
@@ -449,6 +451,42 @@ fi
       {fork => 1});
   $docker_limitmem = ($? == 0);
 
+  # Find a non-root Docker user to use.
+  # Tries the default user for the container, then 'crunch', then 'nobody',
+  # testing for whether the actual user id is non-zero.  This defends against
+  # mistakes but not malice, but we intend to harden the security in the future
+  # so we don't want anyone getting used to their jobs running as root in their
+  # Docker containers.
+  my @tryusers = ("", "crunch", "nobody");
+  foreach my $try_user (@tryusers) {
+    my $try_user_arg;
+    if ($try_user eq "") {
+      Log(undef, "Checking if container default user is not UID 0");
+      $try_user_arg = "";
+    } else {
+      Log(undef, "Checking if user '$try_user' is not UID 0");
+      $try_user_arg = "--user=$try_user";
+    }
+    srun(["srun", "--nodelist=" . $node[0]],
+         ["/bin/sh", "-ec",
+          "a=`$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user` && " .
+          " test \$a -ne 0"],
+         {fork => 1});
+    if ($? == 0) {
+      $dockeruserarg = $try_user_arg;
+      if ($try_user eq "") {
+        Log(undef, "Container will run with default user");
+      } else {
+        Log(undef, "Container will run with $dockeruserarg");
+      }
+      last;
+    }
+  }
+
+  if (!defined $dockeruserarg) {
+    croak("Could not find a user in container that is not UID 0 (tried default user, @tryusers) or there was a problem running 'id' in the container.");
+  }
+
   if ($Job->{arvados_sdk_version}) {
     # The job also specifies an Arvados SDK version.  Add the SDKs to the
     # tar file for the build script to install.
@@ -784,6 +822,9 @@ update_progress_stats();
 THISROUND:
 for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 {
+  # Don't create new tasks if we already know the job's final result.
+  last if defined($main::success);
+
   my $id = $jobstep_todo[$todo_ptr];
   my $Jobstep = $jobstep[$id];
   if ($Jobstep->{level} != $level)
@@ -831,11 +872,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
     $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
     $ENV{"HOME"} = $ENV{"TASK_WORK"};
-    $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
     $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
     $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
     $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
 
+    my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
+
     $ENV{"GZIP"} = "-n";
 
     my @srunargs = (
@@ -844,22 +886,35 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       qw(-n1 -c1 -N1 -D), $ENV{'TMPDIR'},
       "--job-name=$job_id.$id.$$",
        );
+
+    my $stdbuf = " stdbuf --output=0 --error=0 ";
+
+    my $arv_file_cache = "";
+    if (defined($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'})) {
+      $arv_file_cache = "--file-cache=" . ($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'} * 1024 * 1024);
+    }
+
     my $command =
-       "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
-        ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} "
-       ."&& cd $ENV{CRUNCH_TMP} "
+       "if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
+        ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
+       ."&& cd \Q$ENV{CRUNCH_TMP}\E "
         # These environment variables get used explicitly later in
         # $command.  No tool is expected to read these values directly.
         .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
         .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
         ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
-    $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
+
+    $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
+    $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
+    $ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
+
     if ($docker_hash)
     {
-      my $cidfile = "$ENV{CRUNCH_TMP}/$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}.cid";
+      my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
+      my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
       $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
-      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy ";
+      $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
       # We only set memory limits if Docker lets us limit both memory and swap.
       # Memory limits alone have been supported longer, but subprocesses tend
       # to get SIGKILL if they exceed that without any swap limit set.
@@ -874,14 +929,18 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
       $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
 
-      # Currently, we make arv-mount's mount point appear at /keep
-      # inside the container (instead of using the same path as the
-      # host like we do with CRUNCH_SRC and CRUNCH_INSTALL). However,
-      # crunch scripts and utilities must not rely on this. They must
-      # use $TASK_KEEPMOUNT.
+      # Currently, we make the "by_pdh" directory in arv-mount's mount
+      # point appear at /keep inside the container (instead of using
+      # the same path as the host like we do with CRUNCH_SRC and
+      # CRUNCH_INSTALL). However, crunch scripts and utilities must
+      # not rely on this. They must use $TASK_KEEPMOUNT.
       $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
       $ENV{TASK_KEEPMOUNT} = "/keep";
 
+      # Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
+      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp\E ";
+      $ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
+
       # TASK_WORK is almost exactly like a docker data volume: it
       # starts out empty, is writable, and persists until no
       # containers use it any more. We don't use --volumes-from to
@@ -917,12 +976,25 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       }
       $command .= "--env=\QHOME=$ENV{HOME}\E ";
       $command .= "\Q$docker_hash\E ";
-      $command .= "stdbuf --output=0 --error=0 ";
-      $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
+
+      if ($Job->{arvados_sdk_version}) {
+        $command .= $stdbuf;
+        $command .= "perl - \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E";
+      } else {
+        $command .= "/bin/sh -c \'python -c " .
+            '"from pkg_resources import get_distribution as get; print \"Using Arvados SDK version\", get(\"arvados-python-client\").version"' .
+            ">&2 2>/dev/null; " .
+            "mkdir -p \"$ENV{JOB_WORK}\" \"$ENV{TASK_WORK}\" && " .
+            "if which stdbuf >/dev/null ; then " .
+            "  exec $stdbuf \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
+            " else " .
+            "  exec \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
+            " fi\'";
+      }
     } else {
       # Non-docker run
       $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 ";
-      $command .= "stdbuf --output=0 --error=0 ";
+      $command .= $stdbuf;
       $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
     }
 
@@ -968,7 +1040,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         ||
         ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
   {
-    last THISROUND if $main::please_freeze || defined($main::success);
+    last THISROUND if $main::please_freeze;
     if ($main::please_info)
     {
       $main::please_info = 0;
@@ -980,7 +1052,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     my $gotsome
        = readfrompipes ()
        + reapchildren ();
-    if (!$gotsome)
+    if (!$gotsome || ($latest_refresh + 2 < scalar time))
     {
       check_refresh_wanted();
       check_squeue();
@@ -1129,6 +1201,9 @@ sub reapchildren
 
   if (!defined $task_success) {
     # task did not indicate one way or the other --> fail
+    Log($jobstepid, sprintf(
+          "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
+          exit_status_s($childstatus)));
     $Jobstep->{'arvados_task'}->{success} = 0;
     $Jobstep->{'arvados_task'}->save;
     $task_success = 0;
@@ -1645,20 +1720,24 @@ sub log_writer_finish()
 
   close($log_pipe_in);
 
+  my $logger_failed = 0;
   my $read_result = log_writer_read_output(120);
   if ($read_result == -1) {
+    $logger_failed = -1;
     Log (undef, "timed out reading from 'arv-put'");
   } elsif ($read_result != 0) {
+    $logger_failed = -2;
     Log(undef, "failed to read arv-put log manifest to EOF");
   }
 
   waitpid($log_pipe_pid, 0);
   if ($?) {
+    $logger_failed ||= $?;
     Log(undef, "log_writer_finish: arv-put exited " . exit_status_s($?))
   }
 
   close($log_pipe_out);
-  my $arv_put_output = $log_pipe_out_buf;
+  my $arv_put_output = $logger_failed ? undef : $log_pipe_out_buf;
   $log_pipe_pid = $log_pipe_in = $log_pipe_out = $log_pipe_out_buf =
       $log_pipe_out_select = undef;
 
@@ -1724,13 +1803,13 @@ sub save_meta
   my $justcheckpoint = shift; # false if this will be the last meta saved
   return if $justcheckpoint;  # checkpointing is not relevant post-Warehouse.pm
   return unless log_writer_is_active();
+  my $log_manifest = log_writer_finish();
+  return unless defined($log_manifest);
 
-  my $log_manifest = "";
   if ($Job->{log}) {
     my $prev_log_coll = api_call("collections/get", uuid => $Job->{log});
-    $log_manifest .= $prev_log_coll->{manifest_text};
+    $log_manifest = $prev_log_coll->{manifest_text} . $log_manifest;
   }
-  $log_manifest .= log_writer_finish();
 
   my $log_coll = api_call(
     "collections/create", ensure_unique_name => 1, collection => {
@@ -2109,10 +2188,11 @@ if (@ARGV) {
     $Log->("Built Python SDK virtualenv");
   }
 
-  my $pip_bin = "pip";
+  my @pysdk_version_cmd = ("python", "-c",
+    "from pkg_resources import get_distribution as get; print get('arvados-python-client').version");
   if ($venv_built) {
     $Log->("Running in Python SDK virtualenv");
-    $pip_bin = "$venv_dir/bin/pip";
+    @pysdk_version_cmd = ();
     my $orig_argv = join(" ", map { quotemeta($_); } @ARGV);
     @ARGV = ("/bin/sh", "-ec",
              ". \Q$venv_dir/bin/activate\E; exec $orig_argv");
@@ -2121,14 +2201,18 @@ if (@ARGV) {
            "\$PATH. Can't install Python SDK.");
   }
 
-  my $pkgs = `(\Q$pip_bin\E freeze 2>/dev/null | grep arvados) || dpkg-query --show '*arvados*'`;
-  if ($pkgs) {
-    $Log->("Using Arvados SDK:");
-    foreach my $line (split /\n/, $pkgs) {
-      $Log->($line);
+  if (@pysdk_version_cmd) {
+    open(my $pysdk_version_pipe, "-|", @pysdk_version_cmd);
+    my $pysdk_version = <$pysdk_version_pipe>;
+    close($pysdk_version_pipe);
+    if ($? == 0) {
+      chomp($pysdk_version);
+      $Log->("Using Arvados SDK version $pysdk_version");
+    } else {
+      # A lot could've gone wrong here, but pretty much all of it means that
+      # Python won't be able to load the Arvados SDK.
+      $Log->("Warning: Arvados SDK not found");
     }
-  } else {
-    $Log->("Arvados SDK packages not found");
   }
 
   while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {
index 5e58014cbfa10d3b9b67a8b7cddca8b8676f646c..2e2bba562f1e7dedc09fd3f4491da424cb7a5850 100644 (file)
 require 'minitest/autorun'
-require 'digest/md5'
+require 'json'
+require 'yaml'
 
+# Black box tests for 'arv get' command.
 class TestArvGet < Minitest::Test
-  def setup
-    begin
-      Dir.mkdir './tmp'
-    rescue Errno::EEXIST
-    end
-    @@foo_manifest_locator ||= `echo -n foo | ./bin/arv-put --filename foo --no-progress -`.strip
-    @@baz_locator ||= `echo -n baz | ./bin/arv-put --as-raw --no-progress -`.strip
-    @@multilevel_manifest_locator ||= `echo ./foo/bar #{@@baz_locator} 0:3:baz | ./bin/arv-put --as-raw --no-progress -`.strip
-  end
+  # UUID for an Arvados object that does not exist
+  NON_EXISTENT_OBJECT_UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+  # Name of field of Arvados object that can store any (textual) value
+  STORED_VALUE_FIELD_NAME = "name"
+  # Name of UUID field of Arvados object
+  UUID_FIELD_NAME = "uuid"
+  # Name of an invalid field of Arvados object
+  INVALID_FIELD_NAME = "invalid"
 
-  def test_no_args
+  # Tests that a valid Arvados object can be retrieved in a supported format
+  # using: `arv get [uuid]`. Given all other `arv foo` commands return JSON
+  # when no format is specified, JSON should be expected in this case.
+  def test_get_valid_object_no_format_specified
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get false
+      assert(arv_get_default(uuid))
     end
-    assert_equal '', out
-    assert_match /^usage:/, err
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
   end
 
-  def test_help
+  # Tests that a valid Arvados object can be retrieved in JSON format using:
+  # `arv get [uuid] --format json`.
+  def test_get_valid_object_json_format_specified
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get '-h'
+      assert(arv_get_json(uuid))
     end
-    $stderr.write err
-    assert_equal '', err
-    assert_match /^usage:/, out
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
   end
 
-  def test_file_to_dev_stdout
-    test_file_to_stdout('/dev/stdout')
-  end
-
-  def test_file_to_stdout(specify_stdout_as='-')
+  # Tests that a valid Arvados object can be retrieved in YAML format using:
+  # `arv get [uuid] --format yaml`.
+  def test_get_valid_object_yaml_format_specified
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
+      assert(arv_get_yaml(uuid))
     end
-    assert_equal '', err
-    assert_equal 'foo', out
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_yaml_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
   end
 
-  def test_file_to_file
-    remove_tmp_foo
+  # Tests that a subset of all fields of a valid Arvados object can be retrieved
+  # using: `arv get [uuid] [fields...]`.
+  def test_get_valid_object_with_valid_fields
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
+      assert(arv_get_json(uuid, STORED_VALUE_FIELD_NAME, UUID_FIELD_NAME))
     end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
+    assert(has_field_with_value(arv_object, UUID_FIELD_NAME, uuid))
   end
 
-  def test_file_to_file_no_overwrite_file
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
+  # Tests that the valid field is retrieved when both a valid and invalid field
+  # are requested from a valid Arvados object, using:
+  # `arv get [uuid] [fields...]`.
+  def test_get_valid_object_with_both_valid_and_invalid_fields
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
+      assert(arv_get_json(uuid, STORED_VALUE_FIELD_NAME, INVALID_FIELD_NAME))
     end
-    assert_match /Local file tmp\/foo already exists/, err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
+    refute(has_field_with_value(arv_object, INVALID_FIELD_NAME, stored_value))
   end
 
-  def test_file_to_file_no_overwrite_file_in_dir
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
+  # Tests that no fields are retreived when no valid fields are requested from
+  # a valid Arvados object, using: `arv get [uuid] [fields...]`.
+  def test_get_valid_object_with_no_valid_fields
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
+      assert(arv_get_json(uuid, INVALID_FIELD_NAME))
     end
-    assert_match /Local file tmp\/foo already exists/, err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert_equal(0, arv_object.length)
   end
 
-  def test_file_to_file_force_overwrite
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
-    assert_equal 'baz', IO.read('tmp/foo')
+  # Tests that an invalid (non-existent) Arvados object is not retrieved using:
+  # using: `arv get [non-existent-uuid]`.
+  def test_get_invalid_object
     out, err = capture_subprocess_io do
-      assert_arv_get '-f', @@foo_manifest_locator + '/', 'tmp/'
+      refute(arv_get_json(NON_EXISTENT_OBJECT_UUID))
     end
-    assert_match '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
+    refute_empty(err, "Expected error feedback on request for invalid object")
+    assert_empty(out)
   end
 
-  def test_file_to_file_skip_existing
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
-    assert_equal 'baz', IO.read('tmp/foo')
+  # Tests that help text exists using: `arv get --help`.
+  def test_help_exists
     out, err = capture_subprocess_io do
-      assert_arv_get '--skip-existing', @@foo_manifest_locator + '/', 'tmp/'
+#      assert(arv_get_default("--help"), "Expected exit code 0: #{$?}")
+       #XXX: Exit code given is 255. It probably should be 0, which seems to be
+       #     standard elsewhere. However, 255 is in line with other `arv`
+       #     commands (e.g. see `arv edit`) so ignoring the problem here.
+       arv_get_default("--help")
     end
-    assert_match '', err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    refute_empty(out, "Help text should be given")
   end
 
-  def test_file_to_dir
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
-    end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
-  end
-
-  def test_dir_to_file
-    out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
-    end
-    assert_equal '', out
-    assert_match /^usage:/, err
-  end
-
-  def test_dir_to_empty_string
-    out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/', ''
-    end
-    assert_equal '', out
-    assert_match /^usage:/, err
-  end
-
-  def test_nonexistent_block
-    out, err = capture_subprocess_io do
-      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
-    end
-    assert_equal '', out
-    assert_match /Error:/, err
-  end
-
-  def test_nonexistent_manifest
-    out, err = capture_subprocess_io do
-      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
-    end
-    assert_equal '', out
-    assert_match /Error:/, err
-  end
-
-  def test_manifest_root_to_dir
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
-    end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
-  end
-
-  def test_manifest_root_to_dir_noslash
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
-    end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
-  end
-
-  def test_display_md5sum
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
-    end
-    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
+  protected
+  # Runs 'arv get <varargs>' with given arguments. Returns whether the exit
+  # status was 0 (i.e. success). Use $? to attain more details on failure.
+  def arv_get_default(*args)
+    return system("arv", "get", *args)
   end
 
-  def test_md5sum_nowrite
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
-    end
-    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
-    assert_equal '', out
-    assert_equal false, File.exists?('tmp/foo')
+  # Runs 'arv --format json get <varargs>' with given arguments. Returns whether
+  # the exit status was 0 (i.e. success). Use $? to attain more details on
+  # failure.
+  def arv_get_json(*args)
+    return system("arv", "--format", "json", "get", *args)
   end
 
-  def test_sha1_nowrite
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
-    end
-    assert_equal "#{Digest::SHA1.hexdigest('foo')}  ./foo\n", err
-    assert_equal '', out
-    assert_equal false, File.exists?('tmp/foo')
+  # Runs 'arv --format yaml get <varargs>' with given arguments. Returns whether
+  # the exit status was 0 (i.e. success). Use $? to attain more details on
+  # failure.
+  def arv_get_yaml(*args)
+    return system("arv", "--format", "yaml", "get", *args)
   end
 
-  def test_block_to_file
-    remove_tmp_foo
+  # Creates an Arvados object that stores a given value. Returns the uuid of the
+  # created object.
+  def create_arv_object_with_value(value)
     out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator, 'tmp/foo'
+      system("arv", "tag", "add", value, "--object", "testing")
+      assert $?.success?, "Command failure running `arv tag`: #{$?}"
     end
     assert_equal '', err
-    assert_equal '', out
-
-    digest = Digest::MD5.hexdigest('foo')
-    !(IO.read('tmp/foo')).gsub!( /^(. #{digest}+3)(.*)( 0:3:foo)$/).nil?
+    assert_operator 0, :<, out.strip.length
+    out.strip
   end
 
-  def test_create_directory_tree
-    `rm -rf ./tmp/arv-get-test/`
-    Dir.mkdir './tmp/arv-get-test'
-    out, err = capture_subprocess_io do
-      assert_arv_get @@multilevel_manifest_locator + '/', 'tmp/arv-get-test/'
+  # Parses the given JSON representation of an Arvados object, returning
+  # an equivalent Ruby representation (a hash map).
+  def parse_json_arv_object(arvObjectAsJson)
+    begin
+      parsed = JSON.parse(arvObjectAsJson)
+      assert(parsed.instance_of?(Hash))
+      return parsed
+    rescue JSON::ParserError => e
+      raise "Invalid JSON representation of Arvados object.\n" \
+            "Parse error: '#{e}'\n" \
+            "JSON: '#{arvObjectAsJson}'\n"
     end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/arv-get-test/foo/bar/baz')
   end
 
-  def test_create_partial_directory_tree
-    `rm -rf ./tmp/arv-get-test/`
-    Dir.mkdir './tmp/arv-get-test'
-    out, err = capture_subprocess_io do
-      assert_arv_get(@@multilevel_manifest_locator + '/foo/',
-                     'tmp/arv-get-test/')
+  # Parses the given JSON representation of an Arvados object, returning
+  # an equivalent Ruby representation (a hash map).
+  def parse_yaml_arv_object(arvObjectAsYaml)
+    begin
+      parsed = YAML.load(arvObjectAsYaml)
+      assert(parsed.instance_of?(Hash))
+      return parsed
+    rescue
+      raise "Invalid YAML representation of Arvados object.\n" \
+            "YAML: '#{arvObjectAsYaml}'\n"
     end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/arv-get-test/bar/baz')
   end
 
-  protected
-  def assert_arv_get(*args)
-    expect = case args.first
-             when true, false
-               args.shift
-             else
-               true
-             end
-    assert_equal(expect,
-                 system(['./bin/arv-get', 'arv-get'], *args),
-                 "`arv-get #{args.join ' '}` " +
-                 "should exit #{if expect then 0 else 'non-zero' end}")
-  end
-
-  def remove_tmp_foo
-    begin
-      File.unlink('tmp/foo')
-    rescue Errno::ENOENT
+  # Checks whether the given Arvados object has the given expected value for the
+  # specified field.
+  def has_field_with_value(arvObjectAsHash, fieldName, expectedValue)
+    if !arvObjectAsHash.has_key?(fieldName)
+      return false
     end
+    return (arvObjectAsHash[fieldName] == expectedValue)
   end
 end
diff --git a/sdk/cli/test/test_arv-keep-get.rb b/sdk/cli/test/test_arv-keep-get.rb
new file mode 100644 (file)
index 0000000..0e578b8
--- /dev/null
@@ -0,0 +1,251 @@
+require 'minitest/autorun'
+require 'digest/md5'
+
+class TestArvKeepGet < Minitest::Test
+  def setup
+    begin
+      Dir.mkdir './tmp'
+    rescue Errno::EEXIST
+    end
+    @@foo_manifest_locator ||= `echo -n foo | ./bin/arv-put --filename foo --no-progress -`.strip
+    @@baz_locator ||= `echo -n baz | ./bin/arv-put --as-raw --no-progress -`.strip
+    @@multilevel_manifest_locator ||= `echo ./foo/bar #{@@baz_locator} 0:3:baz | ./bin/arv-put --as-raw --no-progress -`.strip
+  end
+
+  def test_no_args
+    out, err = capture_subprocess_io do
+      assert_arv_get false
+    end
+    assert_equal '', out
+    assert_match /^usage:/, err
+  end
+
+  def test_help
+    out, err = capture_subprocess_io do
+      assert_arv_get '-h'
+    end
+    $stderr.write err
+    assert_equal '', err
+    assert_match /^usage:/, out
+  end
+
+  def test_file_to_dev_stdout
+    test_file_to_stdout('/dev/stdout')
+  end
+
+  def test_file_to_stdout(specify_stdout_as='-')
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
+    end
+    assert_equal '', err
+    assert_equal 'foo', out
+  end
+
+  def test_file_to_file
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_no_overwrite_file
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
+    end
+    assert_match /Local file tmp\/foo already exists/, err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_no_overwrite_file_in_dir
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_match /Local file tmp\/foo already exists/, err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_force_overwrite
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    assert_equal 'baz', IO.read('tmp/foo')
+    out, err = capture_subprocess_io do
+      assert_arv_get '-f', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_match '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_skip_existing
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    assert_equal 'baz', IO.read('tmp/foo')
+    out, err = capture_subprocess_io do
+      assert_arv_get '--skip-existing', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_match '', err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/foo')
+  end
+
+  def test_file_to_dir
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_dir_to_file
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
+    end
+    assert_equal '', out
+    assert_match /^usage:/, err
+  end
+
+  def test_dir_to_empty_string
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/', ''
+    end
+    assert_equal '', out
+    assert_match /^usage:/, err
+  end
+
+  def test_nonexistent_block
+    out, err = capture_subprocess_io do
+      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
+    end
+    assert_equal '', out
+    assert_match /Error:/, err
+  end
+
+  def test_nonexistent_manifest
+    out, err = capture_subprocess_io do
+      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
+    end
+    assert_equal '', out
+    assert_match /Error:/, err
+  end
+
+  def test_manifest_root_to_dir
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_manifest_root_to_dir_noslash
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_display_md5sum
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_md5sum_nowrite
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
+    assert_equal '', out
+    assert_equal false, File.exists?('tmp/foo')
+  end
+
+  def test_sha1_nowrite
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
+    end
+    assert_equal "#{Digest::SHA1.hexdigest('foo')}  ./foo\n", err
+    assert_equal '', out
+    assert_equal false, File.exists?('tmp/foo')
+  end
+
+  def test_block_to_file
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator, 'tmp/foo'
+    end
+    assert_equal '', err
+    assert_equal '', out
+
+    digest = Digest::MD5.hexdigest('foo')
+    !(IO.read('tmp/foo')).gsub!( /^(. #{digest}+3)(.*)( 0:3:foo)$/).nil?
+  end
+
+  def test_create_directory_tree
+    `rm -rf ./tmp/arv-get-test/`
+    Dir.mkdir './tmp/arv-get-test'
+    out, err = capture_subprocess_io do
+      assert_arv_get @@multilevel_manifest_locator + '/', 'tmp/arv-get-test/'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/arv-get-test/foo/bar/baz')
+  end
+
+  def test_create_partial_directory_tree
+    `rm -rf ./tmp/arv-get-test/`
+    Dir.mkdir './tmp/arv-get-test'
+    out, err = capture_subprocess_io do
+      assert_arv_get(@@multilevel_manifest_locator + '/foo/',
+                     'tmp/arv-get-test/')
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/arv-get-test/bar/baz')
+  end
+
+  protected
+  def assert_arv_get(*args)
+    expect = case args.first
+             when true, false
+               args.shift
+             else
+               true
+             end
+    assert_equal(expect,
+                 system(['./bin/arv-get', 'arv-get'], *args),
+                 "`arv-get #{args.join ' '}` " +
+                 "should exit #{if expect then 0 else 'non-zero' end}")
+  end
+
+  def remove_tmp_foo
+    begin
+      File.unlink('tmp/foo')
+    rescue Errno::ENOENT
+    end
+  end
+end
similarity index 99%
rename from sdk/cli/test/test_arv-put.rb
rename to sdk/cli/test/test_arv-keep-put.rb
index 2f20e18440a2ff61dde6b748d3b327587530b142..fefbc2729875e70cb890f69d56fe1d7f1c614b8d 100644 (file)
@@ -1,7 +1,7 @@
 require 'minitest/autorun'
 require 'digest/md5'
 
-class TestArvPut < Minitest::Test
+class TestArvKeepPut < Minitest::Test
   def setup
     begin Dir.mkdir './tmp' rescue Errno::EEXIST end
     begin Dir.mkdir './tmp/empty_dir' rescue Errno::EEXIST end
index f3298ec4fa30627be96216f82b5609bc9619cd07..4198c34482ccd0f6fa54daa9e5a9d1d143db2ee1 100644 (file)
@@ -14,6 +14,7 @@ import fnmatch
 import logging
 import re
 import os
+
 from cwltool.process import get_feature
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -40,24 +41,37 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image):
 
     return dockerRequirement["dockerImageId"]
 
-class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
+
+class CollectionFsAccess(cwltool.process.StdFsAccess):
     def __init__(self, basedir):
         self.collections = {}
         self.basedir = basedir
 
     def get_collection(self, path):
         p = path.split("/")
-        if arvados.util.keep_locator_pattern.match(p[0]):
-            if p[0] not in self.collections:
-                self.collections[p[0]] = arvados.collection.CollectionReader(p[0])
-            return (self.collections[p[0]], "/".join(p[1:]))
+        if p[0].startswith("keep:") and arvados.util.keep_locator_pattern.match(p[0][5:]):
+            pdh = p[0][5:]
+            if pdh not in self.collections:
+                self.collections[pdh] = arvados.collection.CollectionReader(pdh)
+            return (self.collections[pdh], "/".join(p[1:]))
         else:
             return (None, path)
 
     def _match(self, collection, patternsegments, parent):
+        if not patternsegments:
+            return []
+
+        if not isinstance(collection, arvados.collection.RichCollectionBase):
+            return []
+
         ret = []
+        # iterate over the files and subcollections in 'collection'
         for filename in collection:
-            if fnmatch.fnmatch(filename, patternsegments[0]):
+            if patternsegments[0] == '.':
+                # Pattern contains something like "./foo" so just shift
+                # past the "./"
+                ret.extend(self._match(collection, patternsegments[1:], parent))
+            elif fnmatch.fnmatch(filename, patternsegments[0]):
                 cur = os.path.join(parent, filename)
                 if len(patternsegments) == 1:
                     ret.append(cur)
@@ -68,7 +82,7 @@ class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
     def glob(self, pattern):
         collection, rest = self.get_collection(pattern)
         patternsegments = rest.split("/")
-        return self._match(collection, patternsegments, collection.manifest_locator())
+        return self._match(collection, patternsegments, "keep:" + collection.manifest_locator())
 
     def open(self, fn, mode):
         collection, rest = self.get_collection(fn)
@@ -97,15 +111,17 @@ class ArvadosJob(object):
 
         if self.generatefiles:
             vwd = arvados.collection.Collection()
+            script_parameters["task.vwd"] = {}
             for t in self.generatefiles:
                 if isinstance(self.generatefiles[t], dict):
-                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
+                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"].replace("$(task.keep)/", "keep:"))
                     vwd.copy(rest, t, source_collection=src)
                 else:
                     with vwd.open(t, "w") as f:
                         f.write(self.generatefiles[t])
             vwd.save_new()
-            script_parameters["task.vwd"] = vwd.portable_data_hash()
+            for t in self.generatefiles:
+                script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), t)
 
         script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
         if self.environment:
@@ -120,22 +136,26 @@ class ArvadosJob(object):
         (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
         if docker_req and kwargs.get("use_container") is not False:
             runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
-            runtime_constraints["arvados_sdk_version"] = "master"
 
-        response = self.arvrunner.api.jobs().create(body={
-            "script": "run-command",
-            "repository": "arvados",
-            "script_version": "master",
-            "script_parameters": script_parameters,
-            "runtime_constraints": runtime_constraints
-        }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+        try:
+            response = self.arvrunner.api.jobs().create(body={
+                "script": "crunchrunner",
+                "repository": kwargs["repository"],
+                "script_version": "master",
+                "script_parameters": {"tasks": [script_parameters]},
+                "runtime_constraints": runtime_constraints
+            }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+
+            self.arvrunner.jobs[response["uuid"]] = self
 
-        self.arvrunner.jobs[response["uuid"]] = self
+            logger.info("Job %s is %s", response["uuid"], response["state"])
 
-        logger.info("Job %s is %s", response["uuid"], response["state"])
+            if response["state"] in ("Complete", "Failed", "Cancelled"):
+                self.done(response)
+        except Exception as e:
+            logger.error("Got error %s" % str(e))
+            self.output_callback({}, "permanentFail")
 
-        if response["state"] in ("Complete", "Failed", "Cancelled"):
-            self.done(response)
 
     def done(self, record):
         try:
@@ -146,27 +166,28 @@ class ArvadosJob(object):
 
             try:
                 outputs = {}
-                outputs = self.collect_outputs(record["output"])
+                outputs = self.collect_outputs("keep:" + record["output"])
             except Exception as e:
-                logger.warn(str(e))
+                logger.exception("Got exception while collecting job outputs:")
                 processStatus = "permanentFail"
 
             self.output_callback(outputs, processStatus)
         finally:
             del self.arvrunner.jobs[record["uuid"]]
 
+
 class ArvPathMapper(cwltool.pathmapper.PathMapper):
     def __init__(self, arvrunner, referenced_files, basedir, **kwargs):
-        self._pathmap = {}
+        self._pathmap = arvrunner.get_uploaded()
         uploadfiles = []
 
-        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+/.+')
+        pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+')
 
         for src in referenced_files:
             if isinstance(src, basestring) and pdh_path.match(src):
-                self._pathmap[src] = (src, "/keep/%s" % src)
-            else:
-                ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+                self._pathmap[src] = (src, "$(task.keep)/%s" % src[5:])
+            if src not in self._pathmap:
+                ab = cwltool.pathmapper.abspath(src, basedir)
                 st = arvados.commands.run.statfile("", ab)
                 if kwargs.get("conformance_test"):
                     self._pathmap[src] = (src, ab)
@@ -178,16 +199,21 @@ class ArvPathMapper(cwltool.pathmapper.PathMapper):
                     raise cwltool.workflow.WorkflowException("Input file path '%s' is invalid" % st)
 
         if uploadfiles:
-            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], arvrunner.api, dry_run=kwargs.get("dry_run"), num_retries=3)
+            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
+                                             arvrunner.api,
+                                             dry_run=kwargs.get("dry_run"),
+                                             num_retries=3,
+                                             fnPattern="$(task.keep)/%s/%s")
 
         for src, ab, st in uploadfiles:
+            arvrunner.add_uploaded(src, (ab, st.fn))
             self._pathmap[src] = (ab, st.fn)
 
 
 
 class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
     def __init__(self, arvrunner, toolpath_object, **kwargs):
-        super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+        super(ArvadosCommandTool, self).__init__(toolpath_object, outdir="$(task.outdir)", tmpdir="$(task.tmpdir)", **kwargs)
         self.arvrunner = arvrunner
 
     def makeJobRunner(self):
@@ -204,6 +230,7 @@ class ArvCwlRunner(object):
         self.lock = threading.Lock()
         self.cond = threading.Condition(self.lock)
         self.final_output = None
+        self.uploaded = {}
 
     def arvMakeTool(self, toolpath_object, **kwargs):
         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
@@ -234,6 +261,12 @@ class ArvCwlRunner(object):
                         finally:
                             self.cond.release()
 
+    def get_uploaded(self):
+        return self.uploaded.copy()
+
+    def add_uploaded(self, src, pair):
+        self.uploaded[src] = pair
+
     def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
         events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
 
@@ -241,6 +274,7 @@ class ArvCwlRunner(object):
 
         kwargs["fs_access"] = self.fs_access
         kwargs["enable_reuse"] = args.enable_reuse
+        kwargs["repository"] = args.repository
 
         if kwargs.get("conformance_test"):
             return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
@@ -282,7 +316,7 @@ class ArvCwlRunner(object):
 
 def main(args, stdout, stderr, api_client=None):
     runner = ArvCwlRunner(api_client=arvados.api('v1'))
-    args.append("--leave-outputs")
+    args.insert(0, "--leave-outputs")
     parser = cwltool.main.arg_parser()
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--enable-reuse", action="store_true",
@@ -292,4 +326,6 @@ def main(args, stdout, stderr, api_client=None):
                         default=False, dest="enable_reuse",
                         help="")
 
+    parser.add_argument('--repository', type=str, default="peter/crunchrunner", help="Repository containing the 'crunchrunner' program.")
+
     return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
diff --git a/sdk/cwl/bin/arvados-cwl-runner b/sdk/cwl/bin/arvados-cwl-runner
new file mode 100755 (executable)
index 0000000..f31aefd
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+import sys
+
+from arvados_cwl import main
+
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
index 2fd03f754aed6b62ffe5a5e9a2f0fbee35300909..bcf6b963830aca8570545045ab112ee79aa8216d 100644 (file)
@@ -26,11 +26,12 @@ setup(name='arvados-cwl-runner',
       license='Apache 2.0',
       packages=find_packages(),
       scripts=[
-          'bin/cwl-runner'
+          'bin/cwl-runner',
+          'bin/arvados-cwl-runner'
       ],
       install_requires=[
-          'cwltool',
-          'arvados-python-client'
+          'cwltool>=1.0.20151026181844',
+          'arvados-python-client>=0.1.20151023214338'
       ],
       zip_safe=True,
       cmdclass={'egg_info': tagger},
index 1cce0a7fc92d24e21fa694add86c75c63952eb46..b67eaa59a6749fb7e9b1a3da5ad2617344fc799d 100644 (file)
@@ -14,6 +14,7 @@ import (
        "os"
        "regexp"
        "strings"
+       "time"
 )
 
 type StringMatcher func(string) bool
@@ -25,6 +26,14 @@ var MissingArvadosApiHost = errors.New("Missing required environment variable AR
 var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
 var ErrInvalidArgument = errors.New("Invalid argument")
 
+// A common failure mode is to reuse a keepalive connection that has been
+// terminated (in a way that we can't detect) for being idle too long.
+// POST and DELETE are not safe to retry automatically, so we minimize
+// such failures by always using a new or recently active socket.
+var MaxIdleConnectionDuration = 30 * time.Second
+
+var RetryDelay = 2 * time.Second
+
 // Indicates an error that was returned by the API server.
 type APIServerError struct {
        // Address of server returning error, of the form "host:port".
@@ -58,6 +67,9 @@ type Dict map[string]interface{}
 
 // Information about how to contact the Arvados server
 type ArvadosClient struct {
+       // https
+       Scheme string
+
        // Arvados API server, form "host:port"
        ApiServer string
 
@@ -76,6 +88,11 @@ type ArvadosClient struct {
 
        // Discovery document
        DiscoveryDoc Dict
+
+       lastClosedIdlesAt time.Time
+
+       // Number of retries
+       Retries int
 }
 
 // Create a new ArvadosClient, initialized with standard Arvados environment
@@ -87,12 +104,14 @@ func MakeArvadosClient() (ac ArvadosClient, err error) {
        external := matchTrue.MatchString(os.Getenv("ARVADOS_EXTERNAL_CLIENT"))
 
        ac = ArvadosClient{
+               Scheme:      "https",
                ApiServer:   os.Getenv("ARVADOS_API_HOST"),
                ApiToken:    os.Getenv("ARVADOS_API_TOKEN"),
                ApiInsecure: insecure,
                Client: &http.Client{Transport: &http.Transport{
                        TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
-               External: external}
+               External: external,
+               Retries:  2}
 
        if ac.ApiServer == "" {
                return ac, MissingArvadosApiHost
@@ -101,16 +120,20 @@ func MakeArvadosClient() (ac ArvadosClient, err error) {
                return ac, MissingArvadosApiToken
        }
 
+       ac.lastClosedIdlesAt = time.Now()
+
        return ac, err
 }
 
 // CallRaw is the same as Call() but returns a Reader that reads the
 // response body, instead of taking an output object.
 func (c ArvadosClient) CallRaw(method string, resourceType string, uuid string, action string, parameters Dict) (reader io.ReadCloser, err error) {
-       var req *http.Request
-
+       scheme := c.Scheme
+       if scheme == "" {
+               scheme = "https"
+       }
        u := url.URL{
-               Scheme: "https",
+               Scheme: scheme,
                Host:   c.ApiServer}
 
        if resourceType != API_DISCOVERY_RESOURCE {
@@ -140,36 +163,73 @@ func (c ArvadosClient) CallRaw(method string, resourceType string, uuid string,
                }
        }
 
-       if method == "GET" || method == "HEAD" {
-               u.RawQuery = vals.Encode()
-               if req, err = http.NewRequest(method, u.String(), nil); err != nil {
-                       return nil, err
-               }
-       } else {
-               if req, err = http.NewRequest(method, u.String(), bytes.NewBufferString(vals.Encode())); err != nil {
-                       return nil, err
-               }
-               req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+       retryable := false
+       switch method {
+       case "GET", "HEAD", "PUT", "OPTIONS", "DELETE":
+               retryable = true
        }
 
-       // Add api token header
-       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", c.ApiToken))
-       if c.External {
-               req.Header.Add("X-External-Client", "1")
+       // Non-retryable methods such as POST are not safe to retry automatically,
+       // so we minimize such failures by always using a new or recently active socket
+       if !retryable {
+               if time.Since(c.lastClosedIdlesAt) > MaxIdleConnectionDuration {
+                       c.lastClosedIdlesAt = time.Now()
+                       c.Client.Transport.(*http.Transport).CloseIdleConnections()
+               }
        }
 
        // Make the request
+       var req *http.Request
        var resp *http.Response
-       if resp, err = c.Client.Do(req); err != nil {
-               return nil, err
-       }
 
-       if resp.StatusCode == http.StatusOK {
-               return resp.Body, nil
+       for attempt := 0; attempt <= c.Retries; attempt++ {
+               if method == "GET" || method == "HEAD" {
+                       u.RawQuery = vals.Encode()
+                       if req, err = http.NewRequest(method, u.String(), nil); err != nil {
+                               return nil, err
+                       }
+               } else {
+                       if req, err = http.NewRequest(method, u.String(), bytes.NewBufferString(vals.Encode())); err != nil {
+                               return nil, err
+                       }
+                       req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+               }
+
+               // Add api token header
+               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", c.ApiToken))
+               if c.External {
+                       req.Header.Add("X-External-Client", "1")
+               }
+
+               resp, err = c.Client.Do(req)
+               if err != nil {
+                       if retryable {
+                               time.Sleep(RetryDelay)
+                               continue
+                       } else {
+                               return nil, err
+                       }
+               }
+
+               if resp.StatusCode == http.StatusOK {
+                       return resp.Body, nil
+               }
+
+               defer resp.Body.Close()
+
+               switch resp.StatusCode {
+               case 408, 409, 422, 423, 500, 502, 503, 504:
+                       time.Sleep(RetryDelay)
+                       continue
+               default:
+                       return nil, newAPIServerError(c.ApiServer, resp)
+               }
        }
 
-       defer resp.Body.Close()
-       return nil, newAPIServerError(c.ApiServer, resp)
+       if resp != nil {
+               return nil, newAPIServerError(c.ApiServer, resp)
+       }
+       return nil, err
 }
 
 func newAPIServerError(ServerAddress string, resp *http.Response) APIServerError {
index d35f6dacb72b632e18718b503d0a2f4ff55e7a17..8e32efe4f987adccbe77b573a15ab27dbdfcc707 100644 (file)
@@ -1,11 +1,14 @@
 package arvadosclient
 
 import (
+       "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        . "gopkg.in/check.v1"
+       "net"
        "net/http"
        "os"
        "testing"
+       "time"
 )
 
 // Gocheck boilerplate
@@ -15,13 +18,20 @@ func Test(t *testing.T) {
 
 var _ = Suite(&ServerRequiredSuite{})
 var _ = Suite(&UnitSuite{})
+var _ = Suite(&MockArvadosServerSuite{})
 
 // Tests that require the Keep server running
 type ServerRequiredSuite struct{}
 
 func (s *ServerRequiredSuite) SetUpSuite(c *C) {
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
+       RetryDelay = 0
+}
+
+func (s *ServerRequiredSuite) TearDownSuite(c *C) {
+       arvadostest.StopKeep(2)
+       arvadostest.StopAPI()
 }
 
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
@@ -102,39 +112,50 @@ func (s *ServerRequiredSuite) TestInvalidResourceType(c *C) {
 func (s *ServerRequiredSuite) TestCreatePipelineTemplate(c *C) {
        arv, err := MakeArvadosClient()
 
-       getback := make(Dict)
-       err = arv.Create("pipeline_templates",
-               Dict{"pipeline_template": Dict{
-                       "name": "tmp",
-                       "components": Dict{
-                               "c1": map[string]string{"script": "script1"},
-                               "c2": map[string]string{"script": "script2"}}}},
-               &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp")
-       c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
-
-       uuid := getback["uuid"].(string)
-
-       getback = make(Dict)
-       err = arv.Get("pipeline_templates", uuid, nil, &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp")
-       c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
-
-       getback = make(Dict)
-       err = arv.Update("pipeline_templates", uuid,
-               Dict{
-                       "pipeline_template": Dict{"name": "tmp2"}},
-               &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp2")
-
-       c.Assert(getback["uuid"].(string), Equals, uuid)
-       getback = make(Dict)
-       err = arv.Delete("pipeline_templates", uuid, nil, &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp2")
+       for _, idleConnections := range []bool{
+               false,
+               true,
+       } {
+               if idleConnections {
+                       arv.lastClosedIdlesAt = time.Now().Add(-time.Minute)
+               } else {
+                       arv.lastClosedIdlesAt = time.Now()
+               }
+
+               getback := make(Dict)
+               err = arv.Create("pipeline_templates",
+                       Dict{"pipeline_template": Dict{
+                               "name": "tmp",
+                               "components": Dict{
+                                       "c1": map[string]string{"script": "script1"},
+                                       "c2": map[string]string{"script": "script2"}}}},
+                       &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp")
+               c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
+
+               uuid := getback["uuid"].(string)
+
+               getback = make(Dict)
+               err = arv.Get("pipeline_templates", uuid, nil, &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp")
+               c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
+
+               getback = make(Dict)
+               err = arv.Update("pipeline_templates", uuid,
+                       Dict{
+                               "pipeline_template": Dict{"name": "tmp2"}},
+                       &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp2")
+
+               c.Assert(getback["uuid"].(string), Equals, uuid)
+               getback = make(Dict)
+               err = arv.Delete("pipeline_templates", uuid, nil, &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp2")
+       }
 }
 
 func (s *ServerRequiredSuite) TestErrorResponse(c *C) {
@@ -206,3 +227,160 @@ func (s *UnitSuite) TestPDHMatch(c *C) {
        c.Assert(PDHMatch("+12345"), Equals, false)
        c.Assert(PDHMatch(""), Equals, false)
 }
+
+// Tests that use mock arvados server
+type MockArvadosServerSuite struct{}
+
+func (s *MockArvadosServerSuite) SetUpSuite(c *C) {
+       RetryDelay = 0
+}
+
+func (s *MockArvadosServerSuite) SetUpTest(c *C) {
+       arvadostest.ResetEnv()
+}
+
+type APIServer struct {
+       listener net.Listener
+       url      string
+}
+
+func RunFakeArvadosServer(st http.Handler) (api APIServer, err error) {
+       api.listener, err = net.ListenTCP("tcp", &net.TCPAddr{Port: 0})
+       if err != nil {
+               return
+       }
+       api.url = api.listener.Addr().String()
+       go http.Serve(api.listener, st)
+       return
+}
+
+type APIStub struct {
+       method        string
+       retryAttempts int
+       expected      int
+       respStatus    []int
+       responseBody  []string
+}
+
+func (h *APIStub) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       if req.URL.Path == "/redirect-loop" {
+               http.Redirect(resp, req, "/redirect-loop", http.StatusFound)
+               return
+       }
+       if h.respStatus[h.retryAttempts] < 0 {
+               // Fail the client's Do() by starting a redirect loop
+               http.Redirect(resp, req, "/redirect-loop", http.StatusFound)
+       } else {
+               resp.WriteHeader(h.respStatus[h.retryAttempts])
+               resp.Write([]byte(h.responseBody[h.retryAttempts]))
+       }
+       h.retryAttempts++
+}
+
+func (s *MockArvadosServerSuite) TestWithRetries(c *C) {
+       for _, stub := range []APIStub{
+               {
+                       "get", 0, 200, []int{200, 500}, []string{`{"ok":"ok"}`, ``},
+               },
+               {
+                       "create", 0, 200, []int{200, 500}, []string{`{"ok":"ok"}`, ``},
+               },
+               {
+                       "get", 0, 500, []int{500, 500, 500, 200}, []string{``, ``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "create", 0, 500, []int{500, 500, 500, 200}, []string{``, ``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "update", 0, 500, []int{500, 500, 500, 200}, []string{``, ``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "delete", 0, 500, []int{500, 500, 500, 200}, []string{``, ``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "get", 0, 502, []int{500, 500, 502, 200}, []string{``, ``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "create", 0, 502, []int{500, 500, 502, 200}, []string{``, ``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "get", 0, 200, []int{500, 500, 200}, []string{``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "create", 0, 200, []int{500, 500, 200}, []string{``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "delete", 0, 200, []int{500, 500, 200}, []string{``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "update", 0, 200, []int{500, 500, 200}, []string{``, ``, `{"ok":"ok"}`},
+               },
+               {
+                       "get", 0, 401, []int{401, 200}, []string{``, `{"ok":"ok"}`},
+               },
+               {
+                       "create", 0, 401, []int{401, 200}, []string{``, `{"ok":"ok"}`},
+               },
+               {
+                       "get", 0, 404, []int{404, 200}, []string{``, `{"ok":"ok"}`},
+               },
+               {
+                       "get", 0, 401, []int{500, 401, 200}, []string{``, ``, `{"ok":"ok"}`},
+               },
+
+               // Response code -1 simulates an HTTP/network error
+               // (i.e., Do() returns an error; there is no HTTP
+               // response status code).
+
+               // Succeed on second retry
+               {
+                       "get", 0, 200, []int{-1, -1, 200}, []string{``, ``, `{"ok":"ok"}`},
+               },
+               // "POST" is not safe to retry: fail after one error
+               {
+                       "create", 0, -1, []int{-1, 200}, []string{``, `{"ok":"ok"}`},
+               },
+       } {
+               api, err := RunFakeArvadosServer(&stub)
+               c.Check(err, IsNil)
+
+               defer api.listener.Close()
+
+               arv := ArvadosClient{
+                       Scheme:      "http",
+                       ApiServer:   api.url,
+                       ApiToken:    "abc123",
+                       ApiInsecure: true,
+                       Client:      &http.Client{Transport: &http.Transport{}},
+                       Retries:     2}
+
+               getback := make(Dict)
+               switch stub.method {
+               case "get":
+                       err = arv.Get("collections", "zzzzz-4zz18-znfnqtbbv4spc3w", nil, &getback)
+               case "create":
+                       err = arv.Create("collections",
+                               Dict{"collection": Dict{"name": "testing"}},
+                               &getback)
+               case "update":
+                       err = arv.Update("collections", "zzzzz-4zz18-znfnqtbbv4spc3w",
+                               Dict{"collection": Dict{"name": "testing"}},
+                               &getback)
+               case "delete":
+                       err = arv.Delete("pipeline_templates", "zzzzz-4zz18-znfnqtbbv4spc3w", nil, &getback)
+               }
+
+               switch stub.expected {
+               case 200:
+                       c.Check(err, IsNil)
+                       c.Check(getback["ok"], Equals, "ok")
+               case -1:
+                       c.Check(err, NotNil)
+                       c.Check(err, ErrorMatches, `.*stopped after \d+ redirects`)
+               default:
+                       c.Check(err, NotNil)
+                       c.Check(err, ErrorMatches, fmt.Sprintf("arvados API server error: %d.*", stub.expected))
+                       c.Check(err.(APIServerError).HttpStatusCode, Equals, stub.expected)
+               }
+       }
+}
diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
new file mode 100644 (file)
index 0000000..47b75b3
--- /dev/null
@@ -0,0 +1,35 @@
+package arvadostest
+
+// IDs of API server's test fixtures
+const (
+       SpectatorToken        = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+       ActiveToken           = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+       AdminToken            = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
+       AnonymousToken        = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+       DataManagerToken      = "320mkve8qkswstz7ff61glpk3mhgghmg67wmic7elw4z41pke1"
+       FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
+       NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+       HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+       FooBarDirCollection   = "zzzzz-4zz18-foonbarfilesdir"
+       FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+       HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
+)
+
+// A valid manifest designed to test various edge cases and parsing
+// requirements
+const PathologicalManifest = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K@xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero@0 0:1:f 1:0:zero@1 1:4:ooba 4:0:zero@4 5:1:r 5:4:rbaz 9:0:zero@9\n" +
+       "./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+       "./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+       `./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+       `./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+       "./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+       ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
+
+// An MD5 collision.
+var (
+       MD5CollisionData = [][]byte{
+               []byte("\x0e0eaU\x9a\xa7\x87\xd0\x0b\xc6\xf7\x0b\xbd\xfe4\x04\xcf\x03e\x9epO\x854\xc0\x0f\xfbe\x9cL\x87@\xcc\x94/\xeb-\xa1\x15\xa3\xf4\x15\\\xbb\x86\x07Is\x86em}\x1f4\xa4 Y\xd7\x8fZ\x8d\xd1\xef"),
+               []byte("\x0e0eaU\x9a\xa7\x87\xd0\x0b\xc6\xf7\x0b\xbd\xfe4\x04\xcf\x03e\x9etO\x854\xc0\x0f\xfbe\x9cL\x87@\xcc\x94/\xeb-\xa1\x15\xa3\xf4\x15\xdc\xbb\x86\x07Is\x86em}\x1f4\xa4 Y\xd7\x8fZ\x8d\xd1\xef"),
+       }
+       MD5CollisionMD5 = "cee9a457e790cf20d4bdaa6d69f01e41"
+)
index cad16917dba286504f6693cac3a3fbd4d05a741e..c61b68b319fbe50b5e71ea97e5751da57af687d9 100644 (file)
@@ -3,12 +3,10 @@ package arvadostest
 import (
        "bufio"
        "bytes"
-       "fmt"
-       "io"
-       "io/ioutil"
        "log"
        "os"
        "os/exec"
+       "strconv"
        "strings"
 )
 
@@ -67,24 +65,12 @@ func StartAPI() {
        chdirToPythonTests()
 
        cmd := exec.Command("python", "run_test_server.py", "start", "--auth", "admin")
-       stderr, err := cmd.StderrPipe()
-       if err != nil {
-               log.Fatal(err)
-       }
-       go io.Copy(os.Stderr, stderr)
-       stdout, err := cmd.StdoutPipe()
+       cmd.Stdin = nil
+       cmd.Stderr = os.Stderr
+
+       authScript, err := cmd.Output()
        if err != nil {
-               log.Fatal(err)
-       }
-       if err = cmd.Start(); err != nil {
-               log.Fatal(err)
-       }
-       var authScript []byte
-       if authScript, err = ioutil.ReadAll(stdout); err != nil {
-               log.Fatal(err)
-       }
-       if err = cmd.Wait(); err != nil {
-               log.Fatal(err)
+               log.Fatalf("%+v: %s", cmd.Args, err)
        }
        ParseAuthSettings(authScript)
        ResetEnv()
@@ -95,29 +81,54 @@ func StopAPI() {
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       exec.Command("python", "run_test_server.py", "stop").Run()
+       bgRun(exec.Command("python", "run_test_server.py", "stop"))
 }
 
-func StartKeep() {
+// StartKeep starts the given number of keep servers,
+// optionally with -enforce-permissions enabled.
+// Use numKeepServers = 2 and enforcePermissions = false under all normal circumstances.
+func StartKeep(numKeepServers int, enforcePermissions bool) {
        cwd, _ := os.Getwd()
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       cmd := exec.Command("python", "run_test_server.py", "start_keep")
-       stderr, err := cmd.StderrPipe()
-       if err != nil {
-               log.Fatalf("Setting up stderr pipe: %s", err)
-       }
-       go io.Copy(os.Stderr, stderr)
-       if err := cmd.Run(); err != nil {
-               panic(fmt.Sprintf("'python run_test_server.py start_keep' returned error %s", err))
+       cmdArgs := []string{"run_test_server.py", "start_keep", "--num-keep-servers", strconv.Itoa(numKeepServers)}
+       if enforcePermissions {
+               cmdArgs = append(cmdArgs, "--keep-enforce-permissions")
        }
+
+       bgRun(exec.Command("python", cmdArgs...))
 }
 
-func StopKeep() {
+// StopKeep stops keep servers that were started with StartKeep.
+// numkeepServers should be the same value that was passed to StartKeep,
+// which is 2 under all normal circumstances.
+func StopKeep(numKeepServers int) {
        cwd, _ := os.Getwd()
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       exec.Command("python", "run_test_server.py", "stop_keep").Run()
+       cmd := exec.Command("python", "run_test_server.py", "stop_keep", "--num-keep-servers", strconv.Itoa(numKeepServers))
+       cmd.Stdin = nil
+       cmd.Stderr = os.Stderr
+       cmd.Stdout = os.Stderr
+       if err := cmd.Run(); err != nil {
+               log.Fatalf("%+v: %s", cmd.Args, err)
+       }
+}
+
+// Start cmd, with stderr and stdout redirected to our own
+// stderr. Return when the process exits, but do not wait for its
+// stderr and stdout to close: any grandchild processes will continue
+// writing to our stderr.
+func bgRun(cmd *exec.Cmd) {
+       cmd.Stdin = nil
+       cmd.Stderr = os.Stderr
+       cmd.Stdout = os.Stderr
+       if err := cmd.Start(); err != nil {
+               log.Fatalf("%+v: %s", cmd.Args, err)
+       }
+       if _, err := cmd.Process.Wait(); err != nil {
+               log.Fatalf("%+v: %s", cmd.Args, err)
+       }
 }
diff --git a/sdk/go/arvadostest/stub.go b/sdk/go/arvadostest/stub.go
new file mode 100644 (file)
index 0000000..15a0254
--- /dev/null
@@ -0,0 +1,35 @@
+package arvadostest
+
+import (
+       "net/http"
+)
+
+// StubResponse struct with response status and body
+type StubResponse struct {
+       Status int
+       Body   string
+}
+
+// ServerStub with response map of path and StubResponse
+// Ex:  /arvados/v1/keep_services = arvadostest.StubResponse{200, string(`{}`)}
+type ServerStub struct {
+       Responses map[string]StubResponse
+}
+
+func (stub *ServerStub) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       if req.URL.Path == "/redirect-loop" {
+               http.Redirect(resp, req, "/redirect-loop", http.StatusFound)
+               return
+       }
+
+       pathResponse := stub.Responses[req.URL.Path]
+       if pathResponse.Status == -1 {
+               http.Redirect(resp, req, "/redirect-loop", http.StatusFound)
+       } else if pathResponse.Body != "" {
+               resp.WriteHeader(pathResponse.Status)
+               resp.Write([]byte(pathResponse.Body))
+       } else {
+               resp.WriteHeader(500)
+               resp.Write([]byte(``))
+       }
+}
index 4a719e922dd8a98bdc88445f413ea88788b01a53..ca4eb948b1220c982ef25fa03c9175f40148a797 100644 (file)
@@ -1,6 +1,7 @@
 package auth
 
 import (
+       "encoding/base64"
        "net/http"
        "net/url"
        "strings"
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
        return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,10 +61,24 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
                a.Tokens = append(a.Tokens, val...)
        }
 
+       a.loadTokenFromCookie(r)
+
        // TODO: Load token from Rails session cookie (if Rails site
        // secret is known)
 }
 
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+       cookie, err := r.Cookie("arvados_api_token")
+       if err != nil || len(cookie.Value) == 0 {
+               return
+       }
+       token, err := DecodeTokenCookie(cookie.Value)
+       if err != nil {
+               return
+       }
+       a.Tokens = append(a.Tokens, string(token))
+}
+
 // TODO: LoadTokensFromHttpRequestBody(). We can't assume in
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
index d2f1c60ba93889614de18756dcf3ed6608f0b5fa..a5a668f940f37b18418a29d28d02019e938f7a79 100644 (file)
@@ -3,7 +3,6 @@ package blockdigest
 
 import (
        "fmt"
-       "log"
        "regexp"
        "strconv"
        "strings"
@@ -58,15 +57,6 @@ func FromString(s string) (dig BlockDigest, err error) {
        return
 }
 
-// Will fatal with the error if an error is encountered
-func AssertFromString(s string) BlockDigest {
-       d, err := FromString(s)
-       if err != nil {
-               log.Fatalf("Error creating BlockDigest from %s: %v", s, err)
-       }
-       return d
-}
-
 func IsBlockLocator(s string) bool {
        return LocatorPattern.MatchString(s)
 }
index 017aaa47101c9a2e1971e7694abac24bc77f1e06..e520deefe85315d14294e7bcce8683d8eaf6a299 100644 (file)
@@ -88,13 +88,20 @@ func TestInvalidDigestStrings(t *testing.T) {
 
 func TestBlockDigestWorksAsMapKey(t *testing.T) {
        m := make(map[BlockDigest]int)
-       bd := AssertFromString("01234567890123456789abcdefabcdef")
+       bd, err := FromString("01234567890123456789abcdefabcdef")
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString for block: %v", err)
+       }
        m[bd] = 5
 }
 
 func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
        input := "01234567890123456789abcdefabcdef"
-       prettyPrinted := fmt.Sprintf("%v", AssertFromString(input))
+       fromString, err := FromString(input)
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString: %v", err)
+       }
+       prettyPrinted := fmt.Sprintf("%v", fromString)
        if prettyPrinted != input {
                t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
                        "\"%s\", but instead it was printed as %s",
@@ -103,7 +110,10 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
 }
 
 func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
-       input := "01234567890123456789abcdefabcdef"
+       input, err := FromString("01234567890123456789abcdefabcdef")
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString for block: %v", err)
+       }
        value := 42
        nested := struct {
                // Fun trivia fact: If this field was called "digest" instead of
@@ -113,7 +123,7 @@ func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
                Digest BlockDigest
                value  int
        }{
-               AssertFromString(input),
+               input,
                value,
        }
        prettyPrinted := fmt.Sprintf("%+v", nested)
@@ -155,7 +165,11 @@ func TestParseBlockLocatorSimple(t *testing.T) {
        if err != nil {
                t.Fatalf("Unexpected error parsing block locator: %v", err)
        }
-       expectBlockLocator(t, b, BlockLocator{Digest: AssertFromString("365f83f5f808896ec834c8b595288735"),
+       d, err := FromString("365f83f5f808896ec834c8b595288735")
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString for block: %v", err)
+       }
+       expectBlockLocator(t, b, BlockLocator{Digest: d,
                Size: 2310,
                Hints: []string{"K@qr1hi",
                        "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
diff --git a/sdk/go/crunchrunner/crunchrunner.go b/sdk/go/crunchrunner/crunchrunner.go
new file mode 100644 (file)
index 0000000..8e24e18
--- /dev/null
@@ -0,0 +1,356 @@
+package main
+
+import (
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "log"
+       "os"
+       "os/exec"
+       "os/signal"
+       "strings"
+       "syscall"
+)
+
+type TaskDef struct {
+       Command            []string          `json:"command"`
+       Env                map[string]string `json:"task.env"`
+       Stdin              string            `json:"task.stdin"`
+       Stdout             string            `json:"task.stdout"`
+       Vwd                map[string]string `json:"task.vwd"`
+       SuccessCodes       []int             `json:"task.successCodes"`
+       PermanentFailCodes []int             `json:"task.permanentFailCodes"`
+       TemporaryFailCodes []int             `json:"task.temporaryFailCodes"`
+}
+
+type Tasks struct {
+       Tasks []TaskDef `json:"tasks"`
+}
+
+type Job struct {
+       Script_parameters Tasks `json:"script_parameters"`
+}
+
+type Task struct {
+       Job_uuid                 string  `json:"job_uuid"`
+       Created_by_job_task_uuid string  `json:"created_by_job_task_uuid"`
+       Parameters               TaskDef `json:"parameters"`
+       Sequence                 int     `json:"sequence"`
+       Output                   string  `json:"output"`
+       Success                  bool    `json:"success"`
+       Progress                 float32 `json:"sequence"`
+}
+
+type IArvadosClient interface {
+       Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error
+       Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error)
+}
+
+func setupDirectories(crunchtmpdir, taskUuid string) (tmpdir, outdir string, err error) {
+       tmpdir = crunchtmpdir + "/tmpdir"
+       err = os.Mkdir(tmpdir, 0700)
+       if err != nil {
+               return "", "", err
+       }
+
+       outdir = crunchtmpdir + "/outdir"
+       err = os.Mkdir(outdir, 0700)
+       if err != nil {
+               return "", "", err
+       }
+
+       return tmpdir, outdir, nil
+}
+
+func checkOutputFilename(outdir, fn string) error {
+       if strings.HasPrefix(fn, "/") || strings.HasSuffix(fn, "/") {
+               return fmt.Errorf("Path must not start or end with '/'")
+       }
+       if strings.Index("../", fn) != -1 {
+               return fmt.Errorf("Path must not contain '../'")
+       }
+
+       sl := strings.LastIndex(fn, "/")
+       if sl != -1 {
+               os.MkdirAll(outdir+"/"+fn[0:sl], 0777)
+       }
+       return nil
+}
+
+func setupCommand(cmd *exec.Cmd, taskp TaskDef, outdir string, replacements map[string]string) (stdin, stdout string, err error) {
+       if taskp.Vwd != nil {
+               for k, v := range taskp.Vwd {
+                       v = substitute(v, replacements)
+                       err = checkOutputFilename(outdir, k)
+                       if err != nil {
+                               return "", "", err
+                       }
+                       os.Symlink(v, outdir+"/"+k)
+               }
+       }
+
+       if taskp.Stdin != "" {
+               // Set up stdin redirection
+               stdin = substitute(taskp.Stdin, replacements)
+               cmd.Stdin, err = os.Open(stdin)
+               if err != nil {
+                       return "", "", err
+               }
+       }
+
+       if taskp.Stdout != "" {
+               err = checkOutputFilename(outdir, taskp.Stdout)
+               if err != nil {
+                       return "", "", err
+               }
+               // Set up stdout redirection
+               stdout = outdir + "/" + taskp.Stdout
+               cmd.Stdout, err = os.Create(stdout)
+               if err != nil {
+                       return "", "", err
+               }
+       } else {
+               cmd.Stdout = os.Stdout
+       }
+
+       if taskp.Env != nil {
+               // Set up subprocess environment
+               cmd.Env = os.Environ()
+               for k, v := range taskp.Env {
+                       v = substitute(v, replacements)
+                       cmd.Env = append(cmd.Env, k+"="+v)
+               }
+       }
+       return stdin, stdout, nil
+}
+
+// Set up signal handlers.  Go sends signal notifications to a "signal
+// channel".
+func setupSignals(cmd *exec.Cmd) chan os.Signal {
+       sigChan := make(chan os.Signal, 1)
+       signal.Notify(sigChan, syscall.SIGTERM)
+       signal.Notify(sigChan, syscall.SIGINT)
+       signal.Notify(sigChan, syscall.SIGQUIT)
+       return sigChan
+}
+
+func inCodes(code int, codes []int) bool {
+       if codes != nil {
+               for _, c := range codes {
+                       if code == c {
+                               return true
+                       }
+               }
+       }
+       return false
+}
+
+const TASK_TEMPFAIL = 111
+
+type TempFail struct{ error }
+type PermFail struct{}
+
+func (s PermFail) Error() string {
+       return "PermFail"
+}
+
+func substitute(inp string, subst map[string]string) string {
+       for k, v := range subst {
+               inp = strings.Replace(inp, k, v, -1)
+       }
+       return inp
+}
+
+func runner(api IArvadosClient,
+       kc IKeepClient,
+       jobUuid, taskUuid, crunchtmpdir, keepmount string,
+       jobStruct Job, taskStruct Task) error {
+
+       var err error
+       taskp := taskStruct.Parameters
+
+       // If this is task 0 and there are multiple tasks, dispatch subtasks
+       // and exit.
+       if taskStruct.Sequence == 0 {
+               if len(jobStruct.Script_parameters.Tasks) == 1 {
+                       taskp = jobStruct.Script_parameters.Tasks[0]
+               } else {
+                       for _, task := range jobStruct.Script_parameters.Tasks {
+                               err := api.Create("job_tasks",
+                                       map[string]interface{}{
+                                               "job_task": Task{Job_uuid: jobUuid,
+                                                       Created_by_job_task_uuid: taskUuid,
+                                                       Sequence:                 1,
+                                                       Parameters:               task}},
+                                       nil)
+                               if err != nil {
+                                       return TempFail{err}
+                               }
+                       }
+                       err = api.Update("job_tasks", taskUuid,
+                               map[string]interface{}{
+                                       "job_task": Task{
+                                               Output:   "",
+                                               Success:  true,
+                                               Progress: 1.0}},
+                               nil)
+                       return nil
+               }
+       }
+
+       var tmpdir, outdir string
+       tmpdir, outdir, err = setupDirectories(crunchtmpdir, taskUuid)
+       if err != nil {
+               return TempFail{err}
+       }
+
+       replacements := map[string]string{
+               "$(task.tmpdir)": tmpdir,
+               "$(task.outdir)": outdir,
+               "$(task.keep)":   keepmount}
+
+       // Set up subprocess
+       for k, v := range taskp.Command {
+               taskp.Command[k] = substitute(v, replacements)
+       }
+
+       cmd := exec.Command(taskp.Command[0], taskp.Command[1:]...)
+
+       cmd.Dir = outdir
+
+       var stdin, stdout string
+       stdin, stdout, err = setupCommand(cmd, taskp, outdir, replacements)
+       if err != nil {
+               return err
+       }
+
+       // Run subprocess and wait for it to complete
+       if stdin != "" {
+               stdin = " < " + stdin
+       }
+       if stdout != "" {
+               stdout = " > " + stdout
+       }
+       log.Printf("Running %v%v%v", cmd.Args, stdin, stdout)
+
+       var caughtSignal os.Signal
+       sigChan := setupSignals(cmd)
+
+       err = cmd.Start()
+       if err != nil {
+               signal.Stop(sigChan)
+               return TempFail{err}
+       }
+
+       finishedSignalNotify := make(chan struct{})
+       go func(sig <-chan os.Signal) {
+               for sig := range sig {
+                       caughtSignal = sig
+                       cmd.Process.Signal(caughtSignal)
+               }
+               close(finishedSignalNotify)
+       }(sigChan)
+
+       err = cmd.Wait()
+       signal.Stop(sigChan)
+
+       close(sigChan)
+       <-finishedSignalNotify
+
+       if caughtSignal != nil {
+               log.Printf("Caught signal %v", caughtSignal)
+               return PermFail{}
+       }
+
+       if err != nil {
+               // Run() returns ExitError on non-zero exit code, but we handle
+               // that down below.  So only return if it's not ExitError.
+               if _, ok := err.(*exec.ExitError); !ok {
+                       return TempFail{err}
+               }
+       }
+
+       var success bool
+
+       exitCode := cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+
+       log.Printf("Completed with exit code %v", exitCode)
+
+       if inCodes(exitCode, taskp.PermanentFailCodes) {
+               success = false
+       } else if inCodes(exitCode, taskp.TemporaryFailCodes) {
+               return TempFail{fmt.Errorf("Process tempfail with exit code %v", exitCode)}
+       } else if inCodes(exitCode, taskp.SuccessCodes) || cmd.ProcessState.Success() {
+               success = true
+       } else {
+               success = false
+       }
+
+       // Upload output directory
+       manifest, err := WriteTree(kc, outdir)
+       if err != nil {
+               return TempFail{err}
+       }
+
+       // Set status
+       err = api.Update("job_tasks", taskUuid,
+               map[string]interface{}{
+                       "job_task": Task{
+                               Output:   manifest,
+                               Success:  success,
+                               Progress: 1}},
+               nil)
+       if err != nil {
+               return TempFail{err}
+       }
+
+       if success {
+               return nil
+       } else {
+               return PermFail{}
+       }
+}
+
+func main() {
+       api, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       jobUuid := os.Getenv("JOB_UUID")
+       taskUuid := os.Getenv("TASK_UUID")
+       tmpdir := os.Getenv("TASK_WORK")
+       keepmount := os.Getenv("TASK_KEEPMOUNT")
+
+       var jobStruct Job
+       var taskStruct Task
+
+       err = api.Get("jobs", jobUuid, nil, &jobStruct)
+       if err != nil {
+               log.Fatal(err)
+       }
+       err = api.Get("job_tasks", taskUuid, nil, &taskStruct)
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       var kc IKeepClient
+       kc, err = keepclient.MakeKeepClient(&api)
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       syscall.Umask(0022)
+       err = runner(api, kc, jobUuid, taskUuid, tmpdir, keepmount, jobStruct, taskStruct)
+
+       if err == nil {
+               os.Exit(0)
+       } else if _, ok := err.(TempFail); ok {
+               log.Print(err)
+               os.Exit(TASK_TEMPFAIL)
+       } else if _, ok := err.(PermFail); ok {
+               os.Exit(1)
+       } else {
+               log.Fatal(err)
+       }
+}
diff --git a/sdk/go/crunchrunner/crunchrunner_test.go b/sdk/go/crunchrunner/crunchrunner_test.go
new file mode 100644 (file)
index 0000000..52d5c1a
--- /dev/null
@@ -0,0 +1,445 @@
+package main
+
+import (
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       . "gopkg.in/check.v1"
+       "io"
+       "io/ioutil"
+       "log"
+       "os"
+       "syscall"
+       "testing"
+       "time"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+type TestSuite struct{}
+
+// Gocheck boilerplate
+var _ = Suite(&TestSuite{})
+
+type ArvTestClient struct {
+       c        *C
+       manifest string
+       success  bool
+}
+
+func (t ArvTestClient) Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error {
+       return nil
+}
+
+func (t ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
+       t.c.Check(resourceType, Equals, "job_tasks")
+       t.c.Check(parameters, DeepEquals, arvadosclient.Dict{"job_task": Task{
+               Output:   t.manifest,
+               Success:  t.success,
+               Progress: 1}})
+       return nil
+}
+
+func (s *TestSuite) TestSimpleRun(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, "", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"echo", "foo"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+}
+
+func checkOutput(c *C, tmpdir string) {
+       file, err := os.Open(tmpdir + "/outdir/output.txt")
+       c.Assert(err, IsNil)
+
+       data := make([]byte, 100)
+       var count int
+       err = nil
+       offset := 0
+       for err == nil {
+               count, err = file.Read(data[offset:])
+               offset += count
+       }
+       c.Assert(err, Equals, io.EOF)
+       c.Check(string(data[0:offset]), Equals, "foo\n")
+}
+
+func (s *TestSuite) TestSimpleRunSubtask(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c,
+               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{
+                       TaskDef{Command: []string{"echo", "bar"}},
+                       TaskDef{Command: []string{"echo", "foo"}}}}},
+               Task{Parameters: TaskDef{
+                       Command: []string{"echo", "foo"},
+                       Stdout:  "output.txt"},
+                       Sequence: 1})
+       c.Check(err, IsNil)
+
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestRedirect(c *C) {
+       tmpfile, _ := ioutil.TempFile("", "")
+       tmpfile.Write([]byte("foo\n"))
+       tmpfile.Close()
+       defer os.Remove(tmpfile.Name())
+
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c,
+               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"cat"},
+                       Stdout:  "output.txt",
+                       Stdin:   tmpfile.Name()}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestEnv(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"/bin/sh", "-c", "echo $BAR"},
+                       Stdout:  "output.txt",
+                       Env:     map[string]string{"BAR": "foo"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestEnvSubstitute(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "foo\n",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"/bin/sh", "-c", "echo $BAR"},
+                       Stdout:  "output.txt",
+                       Env:     map[string]string{"BAR": "$(task.keep)"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestEnvReplace(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"/bin/sh", "-c", "echo $PATH"},
+                       Stdout:  "output.txt",
+                       Env:     map[string]string{"PATH": "foo"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+       checkOutput(c, tmpdir)
+}
+
+type SubtaskTestClient struct {
+       c     *C
+       parms []Task
+       i     int
+}
+
+func (t *SubtaskTestClient) Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error {
+       t.c.Check(resourceType, Equals, "job_tasks")
+       t.c.Check(parameters, DeepEquals, arvadosclient.Dict{"job_task": t.parms[t.i]})
+       t.i += 1
+       return nil
+}
+
+func (t SubtaskTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
+       return nil
+}
+
+func (s *TestSuite) TestScheduleSubtask(c *C) {
+
+       api := SubtaskTestClient{c, []Task{
+               Task{Job_uuid: "zzzz-8i9sb-111111111111111",
+                       Created_by_job_task_uuid: "zzzz-ot0gb-111111111111111",
+                       Sequence:                 1,
+                       Parameters: TaskDef{
+                               Command: []string{"echo", "bar"}}},
+               Task{Job_uuid: "zzzz-8i9sb-111111111111111",
+                       Created_by_job_task_uuid: "zzzz-ot0gb-111111111111111",
+                       Sequence:                 1,
+                       Parameters: TaskDef{
+                               Command: []string{"echo", "foo"}}}},
+               0}
+
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(&api, KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{
+                       TaskDef{Command: []string{"echo", "bar"}},
+                       TaskDef{Command: []string{"echo", "foo"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+
+}
+
+func (s *TestSuite) TestRunFail(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, "", false}, KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"/bin/sh", "-c", "exit 1"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, FitsTypeOf, PermFail{})
+}
+
+func (s *TestSuite) TestRunSuccessCode(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, "", true}, KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command:      []string{"/bin/sh", "-c", "exit 1"},
+                       SuccessCodes: []int{0, 1}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+}
+
+func (s *TestSuite) TestRunFailCode(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, "", false}, KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command:            []string{"/bin/sh", "-c", "exit 0"},
+                       PermanentFailCodes: []int{0, 1}}}}},
+               Task{Sequence: 0})
+       c.Check(err, FitsTypeOf, PermFail{})
+}
+
+func (s *TestSuite) TestRunTempFailCode(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, "", false}, KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command:            []string{"/bin/sh", "-c", "exit 1"},
+                       TemporaryFailCodes: []int{1}}}}},
+               Task{Sequence: 0})
+       c.Check(err, FitsTypeOf, TempFail{})
+}
+
+func (s *TestSuite) TestVwd(c *C) {
+       tmpfile, _ := ioutil.TempFile("", "")
+       tmpfile.Write([]byte("foo\n"))
+       tmpfile.Close()
+       defer os.Remove(tmpfile.Name())
+
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"ls", "output.txt"},
+                       Vwd: map[string]string{
+                               "output.txt": tmpfile.Name()}}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestSubstitutionStdin(c *C) {
+       keepmount, _ := ioutil.TempDir("", "")
+       ioutil.WriteFile(keepmount+"/"+"file1.txt", []byte("foo\n"), 0600)
+       defer func() {
+               os.RemoveAll(keepmount)
+       }()
+
+       log.Print("Keepmount is ", keepmount)
+
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       log.Print("tmpdir is ", tmpdir)
+
+       err := runner(ArvTestClient{c,
+               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               keepmount,
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"cat"},
+                       Stdout:  "output.txt",
+                       Stdin:   "$(task.keep)/file1.txt"}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestSubstitutionCommandLine(c *C) {
+       keepmount, _ := ioutil.TempDir("", "")
+       ioutil.WriteFile(keepmount+"/"+"file1.txt", []byte("foo\n"), 0600)
+       defer func() {
+               os.RemoveAll(keepmount)
+       }()
+
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c,
+               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               keepmount,
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"cat", "$(task.keep)/file1.txt"},
+                       Stdout:  "output.txt"}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+
+       checkOutput(c, tmpdir)
+}
+
+func (s *TestSuite) TestSignal(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       go func() {
+               time.Sleep(1 * time.Second)
+               self, _ := os.FindProcess(os.Getpid())
+               self.Signal(syscall.SIGINT)
+       }()
+
+       err := runner(ArvTestClient{c,
+               "", false},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"sleep", "4"}}}}},
+               Task{Sequence: 0})
+       c.Check(err, FitsTypeOf, PermFail{})
+
+}
+
+func (s *TestSuite) TestQuoting(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       err := runner(ArvTestClient{c,
+               "./s\\040ub:dir d3b07384d113edec49eaa6238ad5ff00+4 0:4::e\\040vil\n", true},
+               KeepTestClient{},
+               "zzzz-8i9sb-111111111111111",
+               "zzzz-ot0gb-111111111111111",
+               tmpdir,
+               "",
+               Job{Script_parameters: Tasks{[]TaskDef{TaskDef{
+                       Command: []string{"echo", "foo"},
+                       Stdout:  "s ub:dir/:e vi\nl"}}}},
+               Task{Sequence: 0})
+       c.Check(err, IsNil)
+}
diff --git a/sdk/go/crunchrunner/upload.go b/sdk/go/crunchrunner/upload.go
new file mode 100644 (file)
index 0000000..06a6678
--- /dev/null
@@ -0,0 +1,217 @@
+package main
+
+import (
+       "bytes"
+       "crypto/md5"
+       "errors"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "git.curoverse.com/arvados.git/sdk/go/manifest"
+       "io"
+       "log"
+       "os"
+       "path/filepath"
+       "sort"
+       "strings"
+)
+
+type Block struct {
+       data   []byte
+       offset int64
+}
+
+type ManifestStreamWriter struct {
+       *ManifestWriter
+       *manifest.ManifestStream
+       offset int64
+       *Block
+       uploader chan *Block
+       finish   chan []error
+}
+
+type IKeepClient interface {
+       PutHB(hash string, buf []byte) (string, int, error)
+}
+
+func (m *ManifestStreamWriter) Write(p []byte) (int, error) {
+       n, err := m.ReadFrom(bytes.NewReader(p))
+       return int(n), err
+}
+
+func (m *ManifestStreamWriter) ReadFrom(r io.Reader) (n int64, err error) {
+       var total int64
+       var count int
+
+       for err == nil {
+               if m.Block == nil {
+                       m.Block = &Block{make([]byte, keepclient.BLOCKSIZE), 0}
+               }
+               count, err = r.Read(m.Block.data[m.Block.offset:])
+               total += int64(count)
+               m.Block.offset += int64(count)
+               if m.Block.offset == keepclient.BLOCKSIZE {
+                       m.uploader <- m.Block
+                       m.Block = nil
+               }
+       }
+
+       if err == io.EOF {
+               return total, nil
+       } else {
+               return total, err
+       }
+
+}
+
+func (m *ManifestStreamWriter) goUpload() {
+       var errors []error
+       uploader := m.uploader
+       finish := m.finish
+       for block := range uploader {
+               hash := fmt.Sprintf("%x", md5.Sum(block.data[0:block.offset]))
+               signedHash, _, err := m.ManifestWriter.IKeepClient.PutHB(hash, block.data[0:block.offset])
+               if err != nil {
+                       errors = append(errors, err)
+               } else {
+                       m.ManifestStream.Blocks = append(m.ManifestStream.Blocks, signedHash)
+               }
+       }
+       finish <- errors
+}
+
+type ManifestWriter struct {
+       IKeepClient
+       stripPrefix string
+       Streams     map[string]*ManifestStreamWriter
+}
+
+func (m *ManifestWriter) WalkFunc(path string, info os.FileInfo, err error) error {
+       if info.IsDir() {
+               return nil
+       }
+
+       var dir string
+       if len(path) > (len(m.stripPrefix) + len(info.Name()) + 1) {
+               dir = path[len(m.stripPrefix)+1 : (len(path) - len(info.Name()) - 1)]
+       }
+       if dir == "" {
+               dir = "."
+       }
+
+       fn := path[(len(path) - len(info.Name())):]
+
+       if m.Streams[dir] == nil {
+               m.Streams[dir] = &ManifestStreamWriter{
+                       m,
+                       &manifest.ManifestStream{StreamName: dir},
+                       0,
+                       nil,
+                       make(chan *Block),
+                       make(chan []error)}
+               go m.Streams[dir].goUpload()
+       }
+
+       stream := m.Streams[dir]
+
+       fileStart := stream.offset
+
+       file, err := os.Open(path)
+       if err != nil {
+               return err
+       }
+
+       log.Printf("Uploading %v/%v (%v bytes)", dir, fn, info.Size())
+
+       var count int64
+       count, err = io.Copy(stream, file)
+       if err != nil {
+               return err
+       }
+
+       stream.offset += count
+
+       stream.ManifestStream.FileTokens = append(stream.ManifestStream.FileTokens,
+               fmt.Sprintf("%v:%v:%v", fileStart, count, fn))
+
+       return nil
+}
+
+func (m *ManifestWriter) Finish() error {
+       var errstring string
+       for _, stream := range m.Streams {
+               if stream.uploader == nil {
+                       continue
+               }
+               if stream.Block != nil {
+                       stream.uploader <- stream.Block
+               }
+               close(stream.uploader)
+               stream.uploader = nil
+
+               errors := <-stream.finish
+               close(stream.finish)
+               stream.finish = nil
+
+               for _, r := range errors {
+                       errstring = fmt.Sprintf("%v%v\n", errstring, r.Error())
+               }
+       }
+       if errstring != "" {
+               return errors.New(errstring)
+       } else {
+               return nil
+       }
+}
+
+func (m *ManifestWriter) ManifestText() string {
+       m.Finish()
+       var buf bytes.Buffer
+
+       dirs := make([]string, len(m.Streams))
+       i := 0
+       for k := range m.Streams {
+               dirs[i] = k
+               i++
+       }
+       sort.Strings(dirs)
+
+       for _, k := range dirs {
+               v := m.Streams[k]
+
+               if k == "." {
+                       buf.WriteString(".")
+               } else {
+                       k = strings.Replace(k, " ", "\\040", -1)
+                       k = strings.Replace(k, "\n", "", -1)
+                       buf.WriteString("./" + k)
+               }
+               for _, b := range v.Blocks {
+                       buf.WriteString(" ")
+                       buf.WriteString(b)
+               }
+               for _, f := range v.FileTokens {
+                       buf.WriteString(" ")
+                       f = strings.Replace(f, " ", "\\040", -1)
+                       f = strings.Replace(f, "\n", "", -1)
+                       buf.WriteString(f)
+               }
+               buf.WriteString("\n")
+       }
+       return buf.String()
+}
+
+func WriteTree(kc IKeepClient, root string) (manifest string, err error) {
+       mw := ManifestWriter{kc, root, map[string]*ManifestStreamWriter{}}
+       err = filepath.Walk(root, mw.WalkFunc)
+
+       if err != nil {
+               return "", err
+       }
+
+       err = mw.Finish()
+       if err != nil {
+               return "", err
+       }
+
+       return mw.ManifestText(), nil
+}
diff --git a/sdk/go/crunchrunner/upload_test.go b/sdk/go/crunchrunner/upload_test.go
new file mode 100644 (file)
index 0000000..a2bf0ac
--- /dev/null
@@ -0,0 +1,140 @@
+package main
+
+import (
+       "crypto/md5"
+       "errors"
+       "fmt"
+       . "gopkg.in/check.v1"
+       "io/ioutil"
+       "os"
+)
+
+type UploadTestSuite struct{}
+
+// Gocheck boilerplate
+var _ = Suite(&UploadTestSuite{})
+
+type KeepTestClient struct {
+}
+
+func (k KeepTestClient) PutHB(hash string, buf []byte) (string, int, error) {
+       return fmt.Sprintf("%x+%v", md5.Sum(buf), len(buf)), len(buf), nil
+}
+
+func (s *TestSuite) TestSimpleUpload(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+
+       str, err := WriteTree(KeepTestClient{}, tmpdir)
+       c.Check(err, IsNil)
+       c.Check(str, Equals, ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt\n")
+}
+
+func (s *TestSuite) TestSimpleUploadTwofiles(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+       ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600)
+
+       str, err := WriteTree(KeepTestClient{}, tmpdir)
+       c.Check(err, IsNil)
+       c.Check(str, Equals, ". 3858f62230ac3c915f300c664312c63f+6 0:3:file1.txt 3:3:file2.txt\n")
+}
+
+func (s *TestSuite) TestSimpleUploadSubdir(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       os.Mkdir(tmpdir+"/subdir", 0700)
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+       ioutil.WriteFile(tmpdir+"/subdir/file2.txt", []byte("bar"), 0600)
+
+       str, err := WriteTree(KeepTestClient{}, tmpdir)
+       c.Check(err, IsNil)
+       c.Check(str, Equals, `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
+./subdir 37b51d194a7513e45b56f6524f2d51f2+3 0:3:file2.txt
+`)
+}
+
+func (s *TestSuite) TestSimpleUploadLarge(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       file, _ := os.Create(tmpdir + "/" + "file1.txt")
+       data := make([]byte, 1024*1024-1)
+       for i := range data {
+               data[i] = byte(i % 10)
+       }
+       for i := 0; i < 65; i++ {
+               file.Write(data)
+       }
+       file.Close()
+
+       ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600)
+
+       str, err := WriteTree(KeepTestClient{}, tmpdir)
+       c.Check(err, IsNil)
+       c.Check(str, Equals, ". 00ecf01e0d93385115c9f8bed757425d+67108864 485cd630387b6b1846fe429f261ea05f+1048514 0:68157375:file1.txt 68157375:3:file2.txt\n")
+}
+
+func (s *TestSuite) TestUploadEmptySubdir(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       os.Mkdir(tmpdir+"/subdir", 0700)
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+
+       str, err := WriteTree(KeepTestClient{}, tmpdir)
+       c.Check(err, IsNil)
+       c.Check(str, Equals, `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
+`)
+}
+
+func (s *TestSuite) TestUploadEmptyFile(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte(""), 0600)
+
+       str, err := WriteTree(KeepTestClient{}, tmpdir)
+       c.Check(err, IsNil)
+       c.Check(str, Equals, `. d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1.txt
+`)
+}
+
+type KeepErrorTestClient struct {
+}
+
+func (k KeepErrorTestClient) PutHB(hash string, buf []byte) (string, int, error) {
+       return "", 0, errors.New("Failed!")
+}
+
+func (s *TestSuite) TestUploadError(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+
+       str, err := WriteTree(KeepErrorTestClient{}, tmpdir)
+       c.Check(err, NotNil)
+       c.Check(str, Equals, "")
+}
diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
new file mode 100644 (file)
index 0000000..b532a16
--- /dev/null
@@ -0,0 +1,252 @@
+package keepclient
+
+import (
+       "errors"
+       "io"
+       "os"
+
+       "git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+// ReadCloserWithLen extends io.ReadCloser with a Len() method that
+// returns the total number of bytes available to read.
+type ReadCloserWithLen interface {
+       io.ReadCloser
+       Len() uint64
+}
+
+const (
+       // After reading a data block from Keep, cfReader slices it up
+       // and sends the slices to a buffered channel to be consumed
+       // by the caller via Read().
+       //
+       // dataSliceSize is the maximum size of the slices, and
+       // therefore the maximum number of bytes that will be returned
+       // by a single call to Read().
+       dataSliceSize = 1 << 20
+)
+
+// ErrNoManifest indicates the given collection has no manifest
+// information (e.g., manifest_text was excluded by a "select"
+// parameter when retrieving the collection record).
+var ErrNoManifest = errors.New("Collection has no manifest")
+
+// CollectionFileReader returns a ReadCloserWithLen that reads file
+// content from a collection. The filename must be given relative to
+// the root of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (ReadCloserWithLen, error) {
+       mText, ok := collection["manifest_text"].(string)
+       if !ok {
+               return nil, ErrNoManifest
+       }
+       m := manifest.Manifest{Text: mText}
+       rdrChan := make(chan *cfReader)
+       go kc.queueSegmentsToGet(m, filename, rdrChan)
+       r, ok := <-rdrChan
+       if !ok {
+               return nil, os.ErrNotExist
+       }
+       return r, nil
+}
+
+// Send segments for the specified file to r.toGet. Send a *cfReader
+// to rdrChan if the specified file is found (even if it's empty).
+// Then, close rdrChan.
+func (kc *KeepClient) queueSegmentsToGet(m manifest.Manifest, filename string, rdrChan chan *cfReader) {
+       defer close(rdrChan)
+
+       // q is a queue of FileSegments that we have received but
+       // haven't yet been able to send to toGet.
+       var q []*manifest.FileSegment
+       var r *cfReader
+       for seg := range m.FileSegmentIterByName(filename) {
+               if r == nil {
+                       // We've just discovered that the requested
+                       // filename does appear in the manifest, so we
+                       // can return a real reader (not nil) from
+                       // CollectionFileReader().
+                       r = newCFReader(kc)
+                       rdrChan <- r
+               }
+               q = append(q, seg)
+               r.totalSize += uint64(seg.Len)
+               // Send toGet as many segments as we can until it
+               // blocks.
+       Q:
+               for len(q) > 0 {
+                       select {
+                       case r.toGet <- q[0]:
+                               q = q[1:]
+                       default:
+                               break Q
+                       }
+               }
+       }
+       if r == nil {
+               // File not found.
+               return
+       }
+       close(r.countDone)
+       for _, seg := range q {
+               r.toGet <- seg
+       }
+       close(r.toGet)
+}
+
+type cfReader struct {
+       keepClient *KeepClient
+
+       // doGet() reads FileSegments from toGet, gets the data from
+       // Keep, and sends byte slices to toRead to be consumed by
+       // Read().
+       toGet chan *manifest.FileSegment
+
+       // toRead is a buffered channel, sized to fit one full Keep
+       // block. This lets us verify checksums without having a
+       // store-and-forward delay between blocks: by the time the
+       // caller starts receiving data from block N, cfReader is
+       // starting to fetch block N+1. A larger buffer would be
+       // useful for a caller whose read speed varies a lot.
+       toRead chan []byte
+
+       // bytes ready to send next time someone calls Read()
+       buf []byte
+
+       // Total size of the file being read. Not safe to read this
+       // until countDone is closed.
+       totalSize uint64
+       countDone chan struct{}
+
+       // First error encountered.
+       err error
+
+       // errNotNil is closed IFF err contains a non-nil error.
+       // Receiving from it will block until an error occurs.
+       errNotNil chan struct{}
+
+       // rdrClosed is closed IFF the reader's Close() method has
+       // been called. Any goroutines associated with the reader will
+       // stop and free up resources when they notice this channel is
+       // closed.
+       rdrClosed chan struct{}
+}
+
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+       if r.Error() != nil {
+               // Short circuit: the caller might as well find out
+               // now that we hit an error, even if there's buffered
+               // data we could return.
+               return 0, r.Error()
+       }
+       for len(r.buf) == 0 {
+               // Private buffer was emptied out by the last Read()
+               // (or this is the first Read() and r.buf is nil).
+               // Read from r.toRead until we get a non-empty slice
+               // or hit an error.
+               var ok bool
+               r.buf, ok = <-r.toRead
+               if r.Error() != nil {
+                       // Error encountered while waiting for bytes
+                       return 0, r.Error()
+               } else if !ok {
+                       // No more bytes to read, no error encountered
+                       return 0, io.EOF
+               }
+       }
+       // Copy as much as possible from our private buffer to the
+       // caller's buffer
+       n := len(r.buf)
+       if len(r.buf) > len(outbuf) {
+               n = len(outbuf)
+       }
+       copy(outbuf[:n], r.buf[:n])
+
+       // Next call to Read() will continue where we left off
+       r.buf = r.buf[n:]
+
+       return n, nil
+}
+
+// Close releases resources. It returns a non-nil error if an error
+// was encountered by the reader.
+func (r *cfReader) Close() error {
+       close(r.rdrClosed)
+       return r.Error()
+}
+
+// Error returns an error if one has been encountered, otherwise
+// nil. It is safe to call from any goroutine.
+func (r *cfReader) Error() error {
+       select {
+       case <-r.errNotNil:
+               return r.err
+       default:
+               return nil
+       }
+}
+
+// Len returns the total number of bytes in the file being read. If
+// necessary, it waits for manifest parsing to finish.
+func (r *cfReader) Len() uint64 {
+       // Wait for all segments to be counted
+       <-r.countDone
+       return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+       defer close(r.toRead)
+GET:
+       for fs := range r.toGet {
+               rdr, _, _, err := r.keepClient.Get(fs.Locator)
+               if err != nil {
+                       r.err = err
+                       close(r.errNotNil)
+                       return
+               }
+               var buf = make([]byte, fs.Offset+fs.Len)
+               _, err = io.ReadFull(rdr, buf)
+               if err != nil {
+                       r.err = err
+                       close(r.errNotNil)
+                       return
+               }
+               for bOff, bLen := fs.Offset, dataSliceSize; bOff < fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+                       if bOff+bLen > fs.Offset+fs.Len {
+                               bLen = fs.Offset + fs.Len - bOff
+                       }
+                       select {
+                       case r.toRead <- buf[bOff : bOff+bLen]:
+                       case <-r.rdrClosed:
+                               // Reader is closed: no point sending
+                               // anything more to toRead.
+                               break GET
+                       }
+               }
+               // It is possible that r.rdrClosed is closed but we
+               // never noticed because r.toRead was also ready in
+               // every select{} above. Here we check before wasting
+               // a keepclient.Get() call.
+               select {
+               case <-r.rdrClosed:
+                       break GET
+               default:
+               }
+       }
+       // In case we exited the above loop early: before returning,
+       // drain the toGet channel so its sender doesn't sit around
+       // blocking forever.
+       for _ = range r.toGet {
+       }
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+       r = new(cfReader)
+       r.keepClient = kc
+       r.rdrClosed = make(chan struct{})
+       r.errNotNil = make(chan struct{})
+       r.toGet = make(chan *manifest.FileSegment, 2)
+       r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
+       r.countDone = make(chan struct{})
+       go r.doGet()
+       return
+}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
new file mode 100644 (file)
index 0000000..58a047c
--- /dev/null
@@ -0,0 +1,223 @@
+package keepclient
+
+import (
+       "crypto/md5"
+       "crypto/rand"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "net/http"
+       "os"
+       "strconv"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CollectionReaderUnit{})
+
+type CollectionReaderUnit struct {
+       arv     arvadosclient.ArvadosClient
+       kc      *KeepClient
+       handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+       var err error
+       s.arv, err = arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.IsNil)
+       s.arv.ApiToken = arvadostest.ActiveToken
+
+       s.kc, err = MakeKeepClient(&s.arv)
+       c.Assert(err, check.IsNil)
+
+       s.handler = SuccessHandler{
+               disk: make(map[string][]byte),
+               lock: make(chan struct{}, 1),
+               ops:  new(int),
+       }
+       localRoots := make(map[string]string)
+       for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+       }
+       s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
+type SuccessHandler struct {
+       disk map[string][]byte
+       lock chan struct{} // channel with buffer==1: full when an operation is in progress.
+       ops  *int          // number of operations completed
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       switch req.Method {
+       case "PUT":
+               buf, err := ioutil.ReadAll(req.Body)
+               if err != nil {
+                       resp.WriteHeader(500)
+                       return
+               }
+               pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+               h.lock <- struct{}{}
+               h.disk[pdh] = buf
+               if h.ops != nil {
+                       (*h.ops)++
+               }
+               <-h.lock
+               resp.Write([]byte(pdh))
+       case "GET":
+               pdh := req.URL.Path[1:]
+               h.lock <- struct{}{}
+               buf, ok := h.disk[pdh]
+               if h.ops != nil {
+                       (*h.ops)++
+               }
+               <-h.lock
+               if !ok {
+                       resp.WriteHeader(http.StatusNotFound)
+               } else {
+                       resp.Write(buf)
+               }
+       default:
+               resp.WriteHeader(http.StatusMethodNotAllowed)
+       }
+}
+
+type rdrTest struct {
+       mt   string      // manifest text
+       f    string      // filename
+       want interface{} // error or string to expect
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+       s.kc.PutB([]byte("foo"))
+       s.kc.PutB([]byte("bar"))
+       s.kc.PutB([]byte("Hello world\n"))
+       s.kc.PutB([]byte(""))
+
+       mt := arvadostest.PathologicalManifest
+
+       for _, testCase := range []rdrTest{
+               {mt: mt, f: "zzzz", want: os.ErrNotExist},
+               {mt: mt, f: "frob", want: os.ErrNotExist},
+               {mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+               {mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+               {mt: mt, f: "/f", want: os.ErrNotExist},
+               {mt: mt, f: "./f", want: os.ErrNotExist},
+               {mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+               {mt: mt, f: "foo/zero", want: ""},
+               {mt: mt, f: "zero@0", want: ""},
+               {mt: mt, f: "zero@1", want: ""},
+               {mt: mt, f: "zero@4", want: ""},
+               {mt: mt, f: "zero@9", want: ""},
+               {mt: mt, f: "f", want: "f"},
+               {mt: mt, f: "ooba", want: "ooba"},
+               {mt: mt, f: "overlapReverse/o", want: "o"},
+               {mt: mt, f: "overlapReverse/oo", want: "oo"},
+               {mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+               {mt: mt, f: "foo bar/baz", want: "foo"},
+               {mt: mt, f: "segmented/frob", want: "frob"},
+               {mt: mt, f: "segmented/oof", want: "oof"},
+       } {
+               rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+               switch want := testCase.want.(type) {
+               case error:
+                       c.Check(rdr, check.IsNil)
+                       c.Check(err, check.Equals, want)
+               case string:
+                       buf := make([]byte, len(want))
+                       n, err := io.ReadFull(rdr, buf)
+                       c.Check(err, check.IsNil)
+                       for i := 0; i < 4; i++ {
+                               c.Check(string(buf), check.Equals, want)
+                               n, err = rdr.Read(buf)
+                               c.Check(n, check.Equals, 0)
+                               c.Check(err, check.Equals, io.EOF)
+                       }
+                       c.Check(rdr.Close(), check.Equals, nil)
+               }
+       }
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+       h := md5.New()
+       buf := make([]byte, 4096)
+       locs := make([]string, len(buf))
+       filesize := 0
+       for i := 0; i < len(locs); i++ {
+               _, err := io.ReadFull(rand.Reader, buf[:i])
+               c.Assert(err, check.IsNil)
+               h.Write(buf[:i])
+               locs[i], _, err = s.kc.PutB(buf[:i])
+               c.Assert(err, check.IsNil)
+               filesize += i
+       }
+       manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+       dataMD5 := h.Sum(nil)
+
+       checkMD5 := md5.New()
+       rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+       c.Check(err, check.IsNil)
+       _, err = io.Copy(checkMD5, rdr)
+       c.Check(err, check.IsNil)
+       _, err = rdr.Read(make([]byte, 1))
+       c.Check(err, check.Equals, io.EOF)
+       c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+       s.kc.PutB([]byte("foo"))
+
+       mt := ". "
+       for i := 0; i < 1000; i++ {
+               mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+       }
+       mt += "0:3000:foo1000.txt\n"
+
+       // Grab the stub server's lock, ensuring our cfReader doesn't
+       // get anything back from its first call to kc.Get() before we
+       // have a chance to call Close().
+       s.handler.lock <- struct{}{}
+       opsBeforeRead := *s.handler.ops
+
+       rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+       c.Assert(err, check.IsNil)
+
+       firstReadDone := make(chan struct{})
+       go func() {
+               rdr.Read(make([]byte, 6))
+               firstReadDone <- struct{}{}
+       }()
+       err = rdr.Close()
+       c.Assert(err, check.IsNil)
+       c.Assert(rdr.(*cfReader).Error(), check.IsNil)
+
+       // Release the stub server's lock. The first GET operation will proceed.
+       <-s.handler.lock
+
+       // Make sure our first read operation consumes the data
+       // received from the first GET.
+       <-firstReadDone
+
+       // doGet() should close toRead before sending any more bufs to it.
+       if what, ok := <-rdr.(*cfReader).toRead; ok {
+               c.Errorf("Got %q, expected toRead to be closed", what)
+       }
+
+       // Stub should have handled exactly one GET request.
+       c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+       manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+       buf := make([]byte, 1)
+       rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+       c.Check(err, check.IsNil)
+       for i := 0; i < 2; i++ {
+               _, err = io.ReadFull(rdr, buf)
+               c.Check(err, check.NotNil)
+               c.Check(err, check.Not(check.Equals), io.EOF)
+       }
+}
diff --git a/sdk/go/keepclient/discover.go b/sdk/go/keepclient/discover.go
new file mode 100644 (file)
index 0000000..f039c21
--- /dev/null
@@ -0,0 +1,129 @@
+package keepclient
+
+import (
+       "encoding/json"
+       "fmt"
+       "log"
+       "os"
+       "os/signal"
+       "reflect"
+       "strings"
+       "syscall"
+       "time"
+)
+
+// DiscoverKeepServers gets list of available keep services from api server
+func (this *KeepClient) DiscoverKeepServers() error {
+       var list svcList
+
+       // Get keep services from api server
+       err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
+       if err != nil {
+               return err
+       }
+
+       return this.loadKeepServers(list)
+}
+
+// LoadKeepServicesFromJSON gets list of available keep services from given JSON
+func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
+       var list svcList
+
+       // Load keep services from given json
+       dec := json.NewDecoder(strings.NewReader(services))
+       if err := dec.Decode(&list); err != nil {
+               return err
+       }
+
+       return this.loadKeepServers(list)
+}
+
+// RefreshServices calls DiscoverKeepServers to refresh the keep
+// service list on SIGHUP; when the given interval has elapsed since
+// the last refresh; and (if the last refresh failed) the given
+// errInterval has elapsed.
+func (kc *KeepClient) RefreshServices(interval, errInterval time.Duration) {
+       var previousRoots = []map[string]string{}
+
+       timer := time.NewTimer(interval)
+       gotHUP := make(chan os.Signal, 1)
+       signal.Notify(gotHUP, syscall.SIGHUP)
+
+       for {
+               select {
+               case <-gotHUP:
+               case <-timer.C:
+               }
+               timer.Reset(interval)
+
+               if err := kc.DiscoverKeepServers(); err != nil {
+                       log.Printf("WARNING: Error retrieving services list: %v (retrying in %v)", err, errInterval)
+                       timer.Reset(errInterval)
+                       continue
+               }
+               newRoots := []map[string]string{kc.LocalRoots(), kc.GatewayRoots()}
+
+               if !reflect.DeepEqual(previousRoots, newRoots) {
+                       DebugPrintf("DEBUG: Updated services list: locals %v gateways %v", newRoots[0], newRoots[1])
+                       previousRoots = newRoots
+               }
+
+               if len(newRoots[0]) == 0 {
+                       log.Printf("WARNING: No local services (retrying in %v)", errInterval)
+                       timer.Reset(errInterval)
+               }
+       }
+}
+
+// loadKeepServers
+func (this *KeepClient) loadKeepServers(list svcList) error {
+       listed := make(map[string]bool)
+       localRoots := make(map[string]string)
+       gatewayRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
+
+       // replicasPerService is 1 for disks; unknown or unlimited otherwise
+       this.replicasPerService = 1
+
+       for _, service := range list.Items {
+               scheme := "http"
+               if service.SSL {
+                       scheme = "https"
+               }
+               url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
+
+               // Skip duplicates
+               if listed[url] {
+                       continue
+               }
+               listed[url] = true
+
+               localRoots[service.Uuid] = url
+               if service.ReadOnly == false {
+                       writableLocalRoots[service.Uuid] = url
+                       if service.SvcType != "disk" {
+                               this.replicasPerService = 0
+                       }
+               }
+
+               if service.SvcType != "disk" {
+                       this.foundNonDiskSvc = true
+               }
+
+               // Gateway services are only used when specified by
+               // UUID, so there's nothing to gain by filtering them
+               // by service type. Including all accessible services
+               // (gateway and otherwise) merely accommodates more
+               // service configurations.
+               gatewayRoots[service.Uuid] = url
+       }
+
+       if this.foundNonDiskSvc {
+               this.setClientSettingsNonDisk()
+       } else {
+               this.setClientSettingsDisk()
+       }
+
+       this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
+       return nil
+}
diff --git a/sdk/go/keepclient/discover_test.go b/sdk/go/keepclient/discover_test.go
new file mode 100644 (file)
index 0000000..43a984e
--- /dev/null
@@ -0,0 +1,21 @@
+package keepclient
+
+import (
+       "fmt"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+)
+
+func ExampleRefreshServices() {
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               panic(err)
+       }
+       kc, err := MakeKeepClient(&arv)
+       if err != nil {
+               panic(err)
+       }
+       go kc.RefreshServices(5*time.Minute, 3*time.Second)
+       fmt.Printf("LocalRoots: %#v\n", kc.LocalRoots())
+}
index 70aa3746bcc117932beba6313ed1b3d9a0ac0a3d..26aa7177e05c3aa64d3e956f183379614093d341 100644 (file)
@@ -2,6 +2,7 @@
 package keepclient
 
 import (
+       "bytes"
        "crypto/md5"
        "crypto/tls"
        "errors"
@@ -10,9 +11,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/streamer"
        "io"
        "io/ioutil"
-       "log"
        "net/http"
-       "os"
        "regexp"
        "strconv"
        "strings"
@@ -22,13 +21,45 @@ import (
 // A Keep "block" is 64MB.
 const BLOCKSIZE = 64 * 1024 * 1024
 
-var BlockNotFound = errors.New("Block not found")
+// Error interface with an error and boolean indicating whether the error is temporary
+type Error interface {
+       error
+       Temporary() bool
+}
+
+// multipleResponseError is of type Error
+type multipleResponseError struct {
+       error
+       isTemp bool
+}
+
+func (e *multipleResponseError) Temporary() bool {
+       return e.isTemp
+}
+
+// BlockNotFound is a multipleResponseError where isTemp is false
+var BlockNotFound = &ErrNotFound{multipleResponseError{
+       error:  errors.New("Block not found"),
+       isTemp: false,
+}}
+
+// ErrNotFound is a multipleResponseError where isTemp can be true or false
+type ErrNotFound struct {
+       multipleResponseError
+}
+
 var InsufficientReplicasError = errors.New("Could not write sufficient replicas")
 var OversizeBlockError = errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")")
 var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
 var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
 var InvalidLocatorError = errors.New("Invalid locator")
 
+// ErrNoSuchKeepServer is returned when GetIndex is invoked with a UUID with no matching keep server
+var ErrNoSuchKeepServer = errors.New("No keep server matching the given UUID is found")
+
+// ErrIncompleteIndex is returned when the Index response does not end with a new empty line
+var ErrIncompleteIndex = errors.New("Got incomplete index")
+
 const X_Keep_Desired_Replicas = "X-Keep-Desired-Replicas"
 const X_Keep_Replicas_Stored = "X-Keep-Replicas-Stored"
 
@@ -36,30 +67,46 @@ const X_Keep_Replicas_Stored = "X-Keep-Replicas-Stored"
 type KeepClient struct {
        Arvados            *arvadosclient.ArvadosClient
        Want_replicas      int
-       Using_proxy        bool
        localRoots         *map[string]string
        writableLocalRoots *map[string]string
        gatewayRoots       *map[string]string
        lock               sync.RWMutex
        Client             *http.Client
+       Retries            int
 
        // set to 1 if all writable services are of disk type, otherwise 0
        replicasPerService int
+
+       // Any non-disk typed services found in the list of keepservers?
+       foundNonDiskSvc bool
 }
 
-// Create a new KeepClient.  This will contact the API server to discover Keep
-// servers.
+// MakeKeepClient creates a new KeepClient by contacting the API server to discover Keep servers.
 func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
-       var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
-       insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+       kc := New(arv)
+       return kc, kc.DiscoverKeepServers()
+}
+
+// New func creates a new KeepClient struct.
+// This func does not discover keep servers. It is the caller's responsibility.
+func New(arv *arvadosclient.ArvadosClient) *KeepClient {
+       defaultReplicationLevel := 2
+       value, err := arv.Discovery("defaultCollectionReplication")
+       if err == nil {
+               v, ok := value.(float64)
+               if ok && v > 0 {
+                       defaultReplicationLevel = int(v)
+               }
+       }
+
        kc := &KeepClient{
                Arvados:       arv,
-               Want_replicas: 2,
-               Using_proxy:   false,
+               Want_replicas: defaultReplicationLevel,
                Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: arv.ApiInsecure}}},
+               Retries: 2,
        }
-       return kc, kc.DiscoverKeepServers()
+       return kc
 }
 
 // Put a block given the block hash, a reader, and the number of bytes
@@ -120,6 +167,85 @@ func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error
        }
 }
 
+func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, int64, string, error) {
+       var errs []string
+
+       tries_remaining := 1 + kc.Retries
+
+       serversToTry := kc.getSortedRoots(locator)
+
+       numServers := len(serversToTry)
+       count404 := 0
+
+       var retryList []string
+
+       for tries_remaining > 0 {
+               tries_remaining -= 1
+               retryList = nil
+
+               for _, host := range serversToTry {
+                       url := host + "/" + locator
+
+                       req, err := http.NewRequest(method, url, nil)
+                       if err != nil {
+                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+                               continue
+                       }
+                       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+                       resp, err := kc.Client.Do(req)
+                       if err != nil {
+                               // Probably a network error, may be transient,
+                               // can try again.
+                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+                               retryList = append(retryList, host)
+                       } else if resp.StatusCode != http.StatusOK {
+                               var respbody []byte
+                               respbody, _ = ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
+                               resp.Body.Close()
+                               errs = append(errs, fmt.Sprintf("%s: HTTP %d %q",
+                                       url, resp.StatusCode, bytes.TrimSpace(respbody)))
+
+                               if resp.StatusCode == 408 ||
+                                       resp.StatusCode == 429 ||
+                                       resp.StatusCode >= 500 {
+                                       // Timeout, too many requests, or other
+                                       // server side failure, transient
+                                       // error, can try again.
+                                       retryList = append(retryList, host)
+                               } else if resp.StatusCode == 404 {
+                                       count404++
+                               }
+                       } else {
+                               // Success.
+                               if method == "GET" {
+                                       return HashCheckingReader{
+                                               Reader: resp.Body,
+                                               Hash:   md5.New(),
+                                               Check:  locator[0:32],
+                                       }, resp.ContentLength, url, nil
+                               } else {
+                                       resp.Body.Close()
+                                       return nil, resp.ContentLength, url, nil
+                               }
+                       }
+
+               }
+               serversToTry = retryList
+       }
+       DebugPrintf("DEBUG: %s %s failed: %v", method, locator, errs)
+
+       var err error
+       if count404 == numServers {
+               err = BlockNotFound
+       } else {
+               err = &ErrNotFound{multipleResponseError{
+                       error:  fmt.Errorf("%s %s failed: %v", method, locator, errs),
+                       isTemp: len(serversToTry) > 0,
+               }}
+       }
+       return nil, 0, "", err
+}
+
 // Get() retrieves a block, given a locator. Returns a reader, the
 // expected data length, the URL the block is being fetched from, and
 // an error.
@@ -128,36 +254,7 @@ func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error
 // reader returned by this method will return a BadChecksum error
 // instead of EOF.
 func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
-       var errs []string
-       for _, host := range kc.getSortedRoots(locator) {
-               url := host + "/" + locator
-               req, err := http.NewRequest("GET", url, nil)
-               if err != nil {
-                       continue
-               }
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
-               resp, err := kc.Client.Do(req)
-               if err != nil || resp.StatusCode != http.StatusOK {
-                       if resp != nil {
-                               var respbody []byte
-                               if resp.Body != nil {
-                                       respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
-                               }
-                               errs = append(errs, fmt.Sprintf("%s: %d %s",
-                                       url, resp.StatusCode, strings.TrimSpace(string(respbody))))
-                       } else {
-                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
-                       }
-                       continue
-               }
-               return HashCheckingReader{
-                       Reader: resp.Body,
-                       Hash:   md5.New(),
-                       Check:  locator[0:32],
-               }, resp.ContentLength, url, nil
-       }
-       log.Printf("DEBUG: GET %s failed: %v", locator, errs)
-       return nil, 0, "", BlockNotFound
+       return kc.getOrHead("GET", locator)
 }
 
 // Ask() verifies that a block with the given hash is available and
@@ -168,18 +265,60 @@ func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error)
 // Returns the data size (content length) reported by the Keep service
 // and the URI reporting the data size.
 func (kc *KeepClient) Ask(locator string) (int64, string, error) {
-       for _, host := range kc.getSortedRoots(locator) {
-               url := host + "/" + locator
-               req, err := http.NewRequest("HEAD", url, nil)
-               if err != nil {
-                       continue
-               }
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
-               if resp, err := kc.Client.Do(req); err == nil && resp.StatusCode == http.StatusOK {
-                       return resp.ContentLength, url, nil
-               }
+       _, size, url, err := kc.getOrHead("HEAD", locator)
+       return size, url, err
+}
+
+// GetIndex retrieves a list of blocks stored on the given server whose hashes
+// begin with the given prefix. The returned reader will return an error (other
+// than EOF) if the complete index cannot be retrieved.
+//
+// This is meant to be used only by system components and admin tools.
+// It will return an error unless the client is using a "data manager token"
+// recognized by the Keep services.
+func (kc *KeepClient) GetIndex(keepServiceUUID, prefix string) (io.Reader, error) {
+       url := kc.LocalRoots()[keepServiceUUID]
+       if url == "" {
+               return nil, ErrNoSuchKeepServer
+       }
+
+       url += "/index"
+       if prefix != "" {
+               url += "/" + prefix
+       }
+
+       req, err := http.NewRequest("GET", url, nil)
+       if err != nil {
+               return nil, err
+       }
+
+       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+       resp, err := kc.Client.Do(req)
+       if err != nil {
+               return nil, err
        }
-       return 0, "", BlockNotFound
+
+       defer resp.Body.Close()
+
+       if resp.StatusCode != http.StatusOK {
+               return nil, fmt.Errorf("Got http status code: %d", resp.StatusCode)
+       }
+
+       var respBody []byte
+       respBody, err = ioutil.ReadAll(resp.Body)
+       if err != nil {
+               return nil, err
+       }
+
+       // Got index; verify that it is complete
+       // The response should be "\n" if no locators matched the prefix
+       // Else, it should be a list of locators followed by a blank line
+       if !bytes.Equal(respBody, []byte("\n")) && !bytes.HasSuffix(respBody, []byte("\n\n")) {
+               return nil, ErrIncompleteIndex
+       }
+
+       // Got complete index; strip the trailing newline and send
+       return bytes.NewReader(respBody[0 : len(respBody)-1]), nil
 }
 
 // LocalRoots() returns the map of local (i.e., disk and proxy) Keep
index e4e459e83a51c408141c7127a098f69e955a005e..4ba1d7c245c272ae5a017e513eca4d8adcd292ee 100644 (file)
@@ -14,7 +14,9 @@ import (
        "net"
        "net/http"
        "os"
+       "strings"
        "testing"
+       "time"
 )
 
 // Gocheck boilerplate
@@ -45,14 +47,14 @@ func (s *ServerRequiredSuite) SetUpSuite(c *C) {
                return
        }
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 }
 
 func (s *ServerRequiredSuite) TearDownSuite(c *C) {
        if *no_server {
                return
        }
-       arvadostest.StopKeep()
+       arvadostest.StopKeep(2)
        arvadostest.StopAPI()
 }
 
@@ -69,6 +71,22 @@ func (s *ServerRequiredSuite) TestMakeKeepClient(c *C) {
        }
 }
 
+func (s *ServerRequiredSuite) TestDefaultReplications(c *C) {
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, Equals, nil)
+
+       kc, err := MakeKeepClient(&arv)
+       c.Assert(kc.Want_replicas, Equals, 2)
+
+       arv.DiscoveryDoc["defaultCollectionReplication"] = 3.0
+       kc, err = MakeKeepClient(&arv)
+       c.Assert(kc.Want_replicas, Equals, 3)
+
+       arv.DiscoveryDoc["defaultCollectionReplication"] = 1.0
+       kc, err = MakeKeepClient(&arv)
+       c.Assert(kc.Want_replicas, Equals, 1)
+}
+
 type StubPutHandler struct {
        c              *C
        expectPath     string
@@ -126,10 +144,9 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
                make(chan string)}
 
        UploadToStubHelper(c, st,
-               func(kc *KeepClient, url string, reader io.ReadCloser,
-                       writer io.WriteCloser, upload_status chan uploadStatus) {
+               func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
 
-                       go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
+                       go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), 0)
 
                        writer.Write([]byte("foo"))
                        writer.Close()
@@ -138,13 +155,9 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
                        status := <-upload_status
                        c.Check(status, DeepEquals, uploadStatus{nil, fmt.Sprintf("%s/%s", url, st.expectPath), 200, 1, ""})
                })
-
-       log.Printf("TestUploadToStubKeepServer done")
 }
 
 func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
-       log.Printf("TestUploadToStubKeepServerBufferReader")
-
        st := StubPutHandler{
                c,
                "acbd18db4cc2f85cedef654fccc4a4d8",
@@ -161,7 +174,7 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
 
                        br1 := tr.MakeStreamReader()
 
-                       go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, "TestUploadToStubKeepServerBufferReader")
+                       go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, 0)
 
                        writer.Write([]byte("foo"))
                        writer.Close()
@@ -171,8 +184,6 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
                        status := <-upload_status
                        c.Check(status, DeepEquals, uploadStatus{nil, fmt.Sprintf("%s/%s", url, st.expectPath), 200, 1, ""})
                })
-
-       log.Printf("TestUploadToStubKeepServerBufferReader done")
 }
 
 type FailHandler struct {
@@ -184,9 +195,32 @@ func (fh FailHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        fh.handled <- fmt.Sprintf("http://%s", req.Host)
 }
 
-func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
-       log.Printf("TestFailedUploadToStubKeepServer")
+type FailThenSucceedHandler struct {
+       handled        chan string
+       count          int
+       successhandler StubGetHandler
+}
+
+func (fh *FailThenSucceedHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       if fh.count == 0 {
+               resp.WriteHeader(500)
+               fh.count += 1
+               fh.handled <- fmt.Sprintf("http://%s", req.Host)
+       } else {
+               fh.successhandler.ServeHTTP(resp, req)
+       }
+}
 
+type Error404Handler struct {
+       handled chan string
+}
+
+func (fh Error404Handler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       resp.WriteHeader(404)
+       fh.handled <- fmt.Sprintf("http://%s", req.Host)
+}
+
+func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
        st := FailHandler{
                make(chan string)}
 
@@ -196,7 +230,7 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
                func(kc *KeepClient, url string, reader io.ReadCloser,
                        writer io.WriteCloser, upload_status chan uploadStatus) {
 
-                       go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
+                       go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, 0)
 
                        writer.Write([]byte("foo"))
                        writer.Close()
@@ -207,7 +241,6 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
                        c.Check(status.url, Equals, fmt.Sprintf("%s/%s", url, hash))
                        c.Check(status.statusCode, Equals, 500)
                })
-       log.Printf("TestFailedUploadToStubKeepServer done")
 }
 
 type KeepServer struct {
@@ -226,8 +259,6 @@ func RunSomeFakeKeepServers(st http.Handler, n int) (ks []KeepServer) {
 }
 
 func (s *StandaloneSuite) TestPutB(c *C) {
-       log.Printf("TestPutB")
-
        hash := Md5String("foo")
 
        st := StubPutHandler{
@@ -266,13 +297,9 @@ func (s *StandaloneSuite) TestPutB(c *C) {
                (s1 == shuff[1] && s2 == shuff[0]),
                Equals,
                true)
-
-       log.Printf("TestPutB done")
 }
 
 func (s *StandaloneSuite) TestPutHR(c *C) {
-       log.Printf("TestPutHR")
-
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
 
        st := StubPutHandler{
@@ -310,7 +337,6 @@ func (s *StandaloneSuite) TestPutHR(c *C) {
        kc.PutHR(hash, reader, 3)
 
        shuff := NewRootSorter(kc.LocalRoots(), hash).GetSortedRoots()
-       log.Print(shuff)
 
        s1 := <-st.handled
        s2 := <-st.handled
@@ -319,13 +345,9 @@ func (s *StandaloneSuite) TestPutHR(c *C) {
                (s1 == shuff[1] && s2 == shuff[0]),
                Equals,
                true)
-
-       log.Printf("TestPutHR done")
 }
 
 func (s *StandaloneSuite) TestPutWithFail(c *C) {
-       log.Printf("TestPutWithFail")
-
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
 
        st := StubPutHandler{
@@ -364,6 +386,7 @@ func (s *StandaloneSuite) TestPutWithFail(c *C) {
 
        shuff := NewRootSorter(
                kc.LocalRoots(), Md5String("foo")).GetSortedRoots()
+       c.Logf("%+v", shuff)
 
        phash, replicas, err := kc.PutB([]byte("foo"))
 
@@ -383,8 +406,6 @@ func (s *StandaloneSuite) TestPutWithFail(c *C) {
 }
 
 func (s *StandaloneSuite) TestPutWithTooManyFail(c *C) {
-       log.Printf("TestPutWithTooManyFail")
-
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
 
        st := StubPutHandler{
@@ -401,6 +422,7 @@ func (s *StandaloneSuite) TestPutWithTooManyFail(c *C) {
        kc, _ := MakeKeepClient(&arv)
 
        kc.Want_replicas = 2
+       kc.Retries = 0
        arv.ApiToken = "abc123"
        localRoots := make(map[string]string)
        writableLocalRoots := make(map[string]string)
@@ -426,8 +448,6 @@ func (s *StandaloneSuite) TestPutWithTooManyFail(c *C) {
        c.Check(err, Equals, InsufficientReplicasError)
        c.Check(replicas, Equals, 1)
        c.Check(<-st.handled, Equals, ks1[0].url)
-
-       log.Printf("TestPutWithTooManyFail done")
 }
 
 type StubGetHandler struct {
@@ -447,8 +467,6 @@ func (sgh StubGetHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request)
 }
 
 func (s *StandaloneSuite) TestGet(c *C) {
-       log.Printf("TestGet")
-
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
 
        st := StubGetHandler{
@@ -464,7 +482,7 @@ func (s *StandaloneSuite) TestGet(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
 
        r, n, url2, err := kc.Get(hash)
        defer r.Close()
@@ -475,8 +493,26 @@ func (s *StandaloneSuite) TestGet(c *C) {
        content, err2 := ioutil.ReadAll(r)
        c.Check(err2, Equals, nil)
        c.Check(content, DeepEquals, []byte("foo"))
+}
+
+func (s *StandaloneSuite) TestGet404(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := Error404Handler{make(chan string, 1)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
 
-       log.Printf("TestGet done")
+       r, n, url2, err := kc.Get(hash)
+       c.Check(err, Equals, BlockNotFound)
+       c.Check(n, Equals, int64(0))
+       c.Check(url2, Equals, "")
+       c.Check(r, Equals, nil)
 }
 
 func (s *StandaloneSuite) TestGetFail(c *C) {
@@ -490,10 +526,62 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+       kc.Retries = 0
 
        r, n, url2, err := kc.Get(hash)
-       c.Check(err, Equals, BlockNotFound)
+       errNotFound, _ := err.(*ErrNotFound)
+       c.Check(errNotFound, NotNil)
+       c.Check(strings.Contains(errNotFound.Error(), "HTTP 500"), Equals, true)
+       c.Check(errNotFound.Temporary(), Equals, true)
+       c.Check(n, Equals, int64(0))
+       c.Check(url2, Equals, "")
+       c.Check(r, Equals, nil)
+}
+
+func (s *StandaloneSuite) TestGetFailRetry(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := &FailThenSucceedHandler{make(chan string, 1), 0,
+               StubGetHandler{
+                       c,
+                       hash,
+                       "abc123",
+                       http.StatusOK,
+                       []byte("foo")}}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, n, url2, err := kc.Get(hash)
+       defer r.Close()
+       c.Check(err, Equals, nil)
+       c.Check(n, Equals, int64(3))
+       c.Check(url2, Equals, fmt.Sprintf("%s/%s", ks.url, hash))
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
+func (s *StandaloneSuite) TestGetNetError(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": "http://localhost:62222"}, nil, nil)
+
+       r, n, url2, err := kc.Get(hash)
+       errNotFound, _ := err.(*ErrNotFound)
+       c.Check(errNotFound, NotNil)
+       c.Check(strings.Contains(errNotFound.Error(), "connection refused"), Equals, true)
+       c.Check(errNotFound.Temporary(), Equals, true)
        c.Check(n, Equals, int64(0))
        c.Check(url2, Equals, "")
        c.Check(r, Equals, nil)
@@ -525,7 +613,7 @@ func (s *StandaloneSuite) TestGetWithServiceHint(c *C) {
        arv.ApiToken = "abc123"
        kc.SetServiceRoots(
                map[string]string{"x": ks0.url},
-               map[string]string{"x": ks0.url},
+               nil,
                map[string]string{uuid: ks.url})
 
        r, n, uri, err := kc.Get(hash + "+K@" + uuid)
@@ -572,11 +660,7 @@ func (s *StandaloneSuite) TestGetWithLocalServiceHint(c *C) {
                        "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
                        "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
                        uuid: ks.url},
-               map[string]string{
-                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
-                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
-                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
-                       uuid: ks.url},
+               nil,
                map[string]string{
                        "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
                        "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
@@ -619,7 +703,7 @@ func (s *StandaloneSuite) TestGetWithServiceHintFailoverToLocals(c *C) {
        arv.ApiToken = "abc123"
        kc.SetServiceRoots(
                map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
-               map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
+               nil,
                map[string]string{uuid: ksGateway.url})
 
        r, n, uri, err := kc.Get(hash + "+K@" + uuid)
@@ -654,7 +738,7 @@ func (s *StandaloneSuite) TestChecksum(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
 
        r, n, _, err := kc.Get(barhash)
        _, err = ioutil.ReadAll(r)
@@ -675,7 +759,7 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
        content := []byte("waz")
        hash := fmt.Sprintf("%x", md5.Sum(content))
 
-       fh := FailHandler{
+       fh := Error404Handler{
                make(chan string, 4)}
 
        st := StubGetHandler{
@@ -706,6 +790,7 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
        }
 
        kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
+       kc.Retries = 0
 
        // This test works only if one of the failing services is
        // attempted before the succeeding service. Otherwise,
@@ -775,15 +860,12 @@ func (this StubProxyHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reque
 }
 
 func (s *StandaloneSuite) TestPutProxy(c *C) {
-       log.Printf("TestPutProxy")
-
        st := StubProxyHandler{make(chan string, 1)}
 
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
 
        kc.Want_replicas = 2
-       kc.Using_proxy = true
        arv.ApiToken = "abc123"
        localRoots := make(map[string]string)
        writableLocalRoots := make(map[string]string)
@@ -803,20 +885,15 @@ func (s *StandaloneSuite) TestPutProxy(c *C) {
 
        c.Check(err, Equals, nil)
        c.Check(replicas, Equals, 2)
-
-       log.Printf("TestPutProxy done")
 }
 
 func (s *StandaloneSuite) TestPutProxyInsufficientReplicas(c *C) {
-       log.Printf("TestPutProxy")
-
        st := StubProxyHandler{make(chan string, 1)}
 
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
 
        kc.Want_replicas = 3
-       kc.Using_proxy = true
        arv.ApiToken = "abc123"
        localRoots := make(map[string]string)
        writableLocalRoots := make(map[string]string)
@@ -835,8 +912,6 @@ func (s *StandaloneSuite) TestPutProxyInsufficientReplicas(c *C) {
 
        c.Check(err, Equals, InsufficientReplicasError)
        c.Check(replicas, Equals, 2)
-
-       log.Printf("TestPutProxy done")
 }
 
 func (s *StandaloneSuite) TestMakeLocator(c *C) {
@@ -948,3 +1023,234 @@ func (s *StandaloneSuite) TestPutBWithNoWritableLocalRoots(c *C) {
        c.Check(err, Equals, InsufficientReplicasError)
        c.Check(replicas, Equals, 0)
 }
+
+type StubGetIndexHandler struct {
+       c              *C
+       expectPath     string
+       expectAPIToken string
+       httpStatus     int
+       body           []byte
+}
+
+func (h StubGetIndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       h.c.Check(req.URL.Path, Equals, h.expectPath)
+       h.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", h.expectAPIToken))
+       resp.WriteHeader(h.httpStatus)
+       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(h.body)))
+       resp.Write(h.body)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoPrefix(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index",
+               "abc123",
+               http.StatusOK,
+               []byte(hash + "+3 1443559274\n\n")}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, err := kc.GetIndex("x", "")
+       c.Check(err, Equals, nil)
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+func (s *StandaloneSuite) TestGetIndexWithPrefix(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index/" + hash[0:3],
+               "abc123",
+               http.StatusOK,
+               []byte(hash + "+3 1443559274\n\n")}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, err := kc.GetIndex("x", hash[0:3])
+       c.Check(err, Equals, nil)
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+func (s *StandaloneSuite) TestGetIndexIncomplete(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index/" + hash[0:3],
+               "abc123",
+               http.StatusOK,
+               []byte(hash)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       _, err = kc.GetIndex("x", hash[0:3])
+       c.Check(err, Equals, ErrIncompleteIndex)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoSuchServer(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index/" + hash[0:3],
+               "abc123",
+               http.StatusOK,
+               []byte(hash)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       _, err = kc.GetIndex("y", hash[0:3])
+       c.Check(err, Equals, ErrNoSuchKeepServer)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoSuchPrefix(c *C) {
+       st := StubGetIndexHandler{
+               c,
+               "/index/abcd",
+               "abc123",
+               http.StatusOK,
+               []byte("\n")}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, err := kc.GetIndex("x", "abcd")
+       c.Check(err, Equals, nil)
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+type FailThenSucceedPutHandler struct {
+       handled        chan string
+       count          int
+       successhandler StubPutHandler
+}
+
+func (h *FailThenSucceedPutHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       if h.count == 0 {
+               resp.WriteHeader(500)
+               h.count += 1
+               h.handled <- fmt.Sprintf("http://%s", req.Host)
+       } else {
+               h.successhandler.ServeHTTP(resp, req)
+       }
+}
+
+func (s *StandaloneSuite) TestPutBRetry(c *C) {
+       st := &FailThenSucceedPutHandler{make(chan string, 1), 0,
+               StubPutHandler{
+                       c,
+                       Md5String("foo"),
+                       "abc123",
+                       "foo",
+                       make(chan string, 5)}}
+
+       arv, _ := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+
+       kc.Want_replicas = 2
+       arv.ApiToken = "abc123"
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
+
+       ks := RunSomeFakeKeepServers(st, 2)
+
+       for i, k := range ks {
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               defer k.listener.Close()
+       }
+
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
+
+       hash, replicas, err := kc.PutB([]byte("foo"))
+
+       c.Check(err, Equals, nil)
+       c.Check(hash, Equals, "")
+       c.Check(replicas, Equals, 2)
+}
+
+func (s *ServerRequiredSuite) TestMakeKeepClientWithNonDiskTypeService(c *C) {
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, Equals, nil)
+
+       // Add an additional "testblobstore" keepservice
+       blobKeepService := make(arvadosclient.Dict)
+       err = arv.Create("keep_services",
+               arvadosclient.Dict{"keep_service": arvadosclient.Dict{
+                       "service_host": "localhost",
+                       "service_port": "21321",
+                       "service_type": "testblobstore"}},
+               &blobKeepService)
+       c.Assert(err, Equals, nil)
+       defer func() { arv.Delete("keep_services", blobKeepService["uuid"].(string), nil, nil) }()
+
+       // Make a keepclient and ensure that the testblobstore is included
+       kc, err := MakeKeepClient(&arv)
+       c.Assert(err, Equals, nil)
+
+       // verify kc.LocalRoots
+       c.Check(len(kc.LocalRoots()), Equals, 3)
+       for _, root := range kc.LocalRoots() {
+               c.Check(root, Matches, "http://localhost:\\d+")
+       }
+       c.Assert(kc.LocalRoots()[blobKeepService["uuid"].(string)], Not(Equals), "")
+
+       // verify kc.GatewayRoots
+       c.Check(len(kc.GatewayRoots()), Equals, 3)
+       for _, root := range kc.GatewayRoots() {
+               c.Check(root, Matches, "http://localhost:\\d+")
+       }
+       c.Assert(kc.GatewayRoots()[blobKeepService["uuid"].(string)], Not(Equals), "")
+
+       // verify kc.WritableLocalRoots
+       c.Check(len(kc.WritableLocalRoots()), Equals, 3)
+       for _, root := range kc.WritableLocalRoots() {
+               c.Check(root, Matches, "http://localhost:\\d+")
+       }
+       c.Assert(kc.WritableLocalRoots()[blobKeepService["uuid"].(string)], Not(Equals), "")
+
+       c.Assert(kc.replicasPerService, Equals, 0)
+       c.Assert(kc.foundNonDiskSvc, Equals, true)
+       c.Assert(kc.Client.Timeout, Equals, 300*time.Second)
+}
diff --git a/sdk/go/keepclient/perms.go b/sdk/go/keepclient/perms.go
new file mode 100644 (file)
index 0000000..12105c6
--- /dev/null
@@ -0,0 +1,99 @@
+// Generate and verify permission signatures for Keep locators.
+//
+// See https://dev.arvados.org/projects/arvados/wiki/Keep_locator_format
+
+package keepclient
+
+import (
+       "crypto/hmac"
+       "crypto/sha1"
+       "errors"
+       "fmt"
+       "regexp"
+       "strconv"
+       "strings"
+       "time"
+)
+
+var (
+       // ErrSignatureExpired - a signature was rejected because the
+       // expiry time has passed.
+       ErrSignatureExpired = errors.New("Signature expired")
+       // ErrSignatureInvalid - a signature was rejected because it
+       // was badly formatted or did not match the given secret key.
+       ErrSignatureInvalid = errors.New("Invalid signature")
+       // ErrSignatureMissing - the given locator does not have a
+       // signature hint.
+       ErrSignatureMissing = errors.New("Missing signature")
+)
+
+// makePermSignature generates a SHA-1 HMAC digest for the given blob,
+// token, expiry, and site secret.
+func makePermSignature(blobHash, apiToken, expiry string, permissionSecret []byte) string {
+       hmac := hmac.New(sha1.New, permissionSecret)
+       hmac.Write([]byte(blobHash))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(apiToken))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(expiry))
+       digest := hmac.Sum(nil)
+       return fmt.Sprintf("%x", digest)
+}
+
+// SignLocator returns blobLocator with a permission signature
+// added. If either permissionSecret or apiToken is empty, blobLocator
+// is returned untouched.
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func SignLocator(blobLocator, apiToken string, expiry time.Time, permissionSecret []byte) string {
+       if len(permissionSecret) == 0 || apiToken == "" {
+               return blobLocator
+       }
+       // Strip off all hints: only the hash is used to sign.
+       blobHash := strings.Split(blobLocator, "+")[0]
+       timestampHex := fmt.Sprintf("%08x", expiry.Unix())
+       return blobLocator +
+               "+A" + makePermSignature(blobHash, apiToken, timestampHex, permissionSecret) +
+               "@" + timestampHex
+}
+
+var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
+
+// VerifySignature returns nil if the signature on the signedLocator
+// can be verified using the given apiToken. Otherwise it returns
+// ErrSignatureExpired (if the signature's expiry time has passed,
+// which is something the client could have figured out
+// independently), ErrSignatureMissing (if there is no signature hint
+// at all), or ErrSignatureInvalid (if the signature is present but
+// badly formatted or incorrect).
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func VerifySignature(signedLocator, apiToken string, permissionSecret []byte) error {
+       matches := signedLocatorRe.FindStringSubmatch(signedLocator)
+       if matches == nil {
+               return ErrSignatureMissing
+       }
+       blobHash := matches[1]
+       signatureHex := matches[2]
+       expiryHex := matches[3]
+       if expiryTime, err := parseHexTimestamp(expiryHex); err != nil {
+               return ErrSignatureInvalid
+       } else if expiryTime.Before(time.Now()) {
+               return ErrSignatureExpired
+       }
+       if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, permissionSecret) {
+               return ErrSignatureInvalid
+       }
+       return nil
+}
+
+func parseHexTimestamp(timestampHex string) (ts time.Time, err error) {
+       if tsInt, e := strconv.ParseInt(timestampHex, 16, 0); e == nil {
+               ts = time.Unix(tsInt, 0)
+       } else {
+               err = e
+       }
+       return ts, err
+}
diff --git a/sdk/go/keepclient/perms_test.go b/sdk/go/keepclient/perms_test.go
new file mode 100644 (file)
index 0000000..1380795
--- /dev/null
@@ -0,0 +1,98 @@
+package keepclient
+
+import (
+       "testing"
+       "time"
+)
+
+const (
+       knownHash    = "acbd18db4cc2f85cedef654fccc4a4d8"
+       knownLocator = knownHash + "+3"
+       knownToken   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
+       knownKey     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
+               "p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
+               "ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
+               "jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
+               "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
+               "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
+               "786u5rw2a9gx743dj3fgq2irk"
+       knownSignature     = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+       knownTimestamp     = "7fffffff"
+       knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
+       knownSignedLocator = knownLocator + knownSigHint
+)
+
+func TestSignLocator(t *testing.T) {
+       if ts, err := parseHexTimestamp(knownTimestamp); err != nil {
+               t.Errorf("bad knownTimestamp %s", knownTimestamp)
+       } else {
+               if knownSignedLocator != SignLocator(knownLocator, knownToken, ts, []byte(knownKey)) {
+                       t.Fail()
+               }
+       }
+}
+
+func TestVerifySignature(t *testing.T) {
+       if VerifySignature(knownSignedLocator, knownToken, []byte(knownKey)) != nil {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureExtraHints(t *testing.T) {
+       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle hint before permission signature")
+       }
+
+       if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle hint after permission signature")
+       }
+
+       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle hints around permission signature")
+       }
+}
+
+// The size hint on the locator string should not affect signature validation.
+func TestVerifySignatureWrongSize(t *testing.T) {
+       if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle incorrect size hint")
+       }
+
+       if VerifySignature(knownHash+knownSigHint, knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle missing size hint")
+       }
+}
+
+func TestVerifySignatureBadSig(t *testing.T) {
+       badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
+       if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureBadTimestamp(t *testing.T) {
+       badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
+       if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureBadSecret(t *testing.T) {
+       if VerifySignature(knownSignedLocator, knownToken, []byte("00000000000000000000")) != ErrSignatureInvalid {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureBadToken(t *testing.T) {
+       if VerifySignature(knownSignedLocator, "00000000", []byte(knownKey)) != ErrSignatureInvalid {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureExpired(t *testing.T) {
+       yesterday := time.Now().AddDate(0, 0, -1)
+       expiredLocator := SignLocator(knownHash, knownToken, yesterday, []byte(knownKey))
+       if VerifySignature(expiredLocator, knownToken, []byte(knownKey)) != ErrSignatureExpired {
+               t.Fail()
+       }
+}
index 63800b1e08577edec185cab13bc6c97780b349b1..b12f512507a71d8b07c08da17d7db29f93228fd2 100644 (file)
@@ -7,13 +7,18 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/streamer"
        "io"
        "io/ioutil"
-       "log"
+       "math/rand"
        "net"
        "net/http"
        "strings"
        "time"
 )
 
+// Function used to emit debug messages. The easiest way to enable
+// keepclient debug messages in your application is to assign
+// log.Printf to DebugPrintf.
+var DebugPrintf = func(string, ...interface{}) {}
+
 type keepService struct {
        Uuid     string `json:"uuid"`
        Hostname string `json:"service_host"`
@@ -28,9 +33,9 @@ func Md5String(s string) string {
        return fmt.Sprintf("%x", md5.Sum([]byte(s)))
 }
 
-// Set timeouts apply when connecting to keepproxy services (assumed to be over
-// the Internet).
-func (this *KeepClient) setClientSettingsProxy() {
+// Set timeouts applicable when connecting to non-disk services
+// (assumed to be over the Internet).
+func (this *KeepClient) setClientSettingsNonDisk() {
        if this.Client.Timeout == 0 {
                // Maximum time to wait for a complete response
                this.Client.Timeout = 300 * time.Second
@@ -52,8 +57,8 @@ func (this *KeepClient) setClientSettingsProxy() {
        }
 }
 
-// Set timeouts apply when connecting to keepstore services directly (assumed
-// to be on the local network).
+// Set timeouts applicable when connecting to keepstore services directly
+// (assumed to be on the local network).
 func (this *KeepClient) setClientSettingsDisk() {
        if this.Client.Timeout == 0 {
                // Maximum time to wait for a complete response
@@ -76,69 +81,8 @@ func (this *KeepClient) setClientSettingsDisk() {
        }
 }
 
-// DiscoverKeepServers gets list of available keep services from api server
-func (this *KeepClient) DiscoverKeepServers() error {
-       type svcList struct {
-               Items []keepService `json:"items"`
-       }
-       var m svcList
-
-       // Get keep services from api server
-       err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &m)
-       if err != nil {
-               return err
-       }
-
-       listed := make(map[string]bool)
-       localRoots := make(map[string]string)
-       gatewayRoots := make(map[string]string)
-       writableLocalRoots := make(map[string]string)
-
-       // replicasPerService is 1 for disks; unknown or unlimited otherwise
-       this.replicasPerService = 1
-       this.Using_proxy = false
-
-       for _, service := range m.Items {
-               scheme := "http"
-               if service.SSL {
-                       scheme = "https"
-               }
-               url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
-
-               // Skip duplicates
-               if listed[url] {
-                       continue
-               }
-               listed[url] = true
-
-               localRoots[service.Uuid] = url
-               if service.SvcType == "proxy" {
-                       this.Using_proxy = true
-               }
-
-               if service.ReadOnly == false {
-                       writableLocalRoots[service.Uuid] = url
-                       if service.SvcType != "disk" {
-                               this.replicasPerService = 0
-                       }
-               }
-
-               // Gateway services are only used when specified by
-               // UUID, so there's nothing to gain by filtering them
-               // by service type. Including all accessible services
-               // (gateway and otherwise) merely accommodates more
-               // service configurations.
-               gatewayRoots[service.Uuid] = url
-       }
-
-       if this.Using_proxy {
-               this.setClientSettingsProxy()
-       } else {
-               this.setClientSettingsDisk()
-       }
-
-       this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
-       return nil
+type svcList struct {
+       Items []keepService `json:"items"`
 }
 
 type uploadStatus struct {
@@ -149,14 +93,14 @@ type uploadStatus struct {
        response        string
 }
 
-func (this KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
-       upload_status chan<- uploadStatus, expectedLength int64, requestId string) {
+func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
+       upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
 
        var req *http.Request
        var err error
        var url = fmt.Sprintf("%s/%s", host, hash)
        if req, err = http.NewRequest("PUT", url, nil); err != nil {
-               log.Printf("[%v] Error creating request PUT %v error: %v", requestId, url, err.Error())
+               DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
                upload_status <- uploadStatus{err, url, 0, 0, ""}
                body.Close()
                return
@@ -181,7 +125,7 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 
        var resp *http.Response
        if resp, err = this.Client.Do(req); err != nil {
-               log.Printf("[%v] Upload failed %v error: %v", requestId, url, err.Error())
+               DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error())
                upload_status <- uploadStatus{err, url, 0, 0, ""}
                return
        }
@@ -194,28 +138,28 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
        defer resp.Body.Close()
        defer io.Copy(ioutil.Discard, resp.Body)
 
-       respbody, err2 := ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+       respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
        response := strings.TrimSpace(string(respbody))
        if err2 != nil && err2 != io.EOF {
-               log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
+               DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
                upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
        } else if resp.StatusCode == http.StatusOK {
-               log.Printf("[%v] Upload %v success", requestId, url)
+               DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url)
                upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
        } else {
-               log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
+               DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
                upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
        }
 }
 
-func (this KeepClient) putReplicas(
+func (this *KeepClient) putReplicas(
        hash string,
        tr *streamer.AsyncStream,
        expectedLength int64) (locator string, replicas int, err error) {
 
-       // Take the hash of locator and timestamp in order to identify this
-       // specific transaction in log statements.
-       requestId := fmt.Sprintf("%x", md5.Sum([]byte(locator+time.Now().String())))[0:8]
+       // Generate an arbitrary ID to identify this specific
+       // transaction in debug logs.
+       requestID := rand.Int31()
 
        // Calculate the ordering for uploading to servers
        sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
@@ -228,7 +172,17 @@ func (this KeepClient) putReplicas(
 
        // Used to communicate status from the upload goroutines
        upload_status := make(chan uploadStatus)
-       defer close(upload_status)
+       defer func() {
+               // Wait for any abandoned uploads (e.g., we started
+               // two uploads and the first replied with replicas=2)
+               // to finish before closing the status channel.
+               go func() {
+                       for active > 0 {
+                               <-upload_status
+                       }
+                       close(upload_status)
+               }()
+       }()
 
        // Desired number of replicas
        remaining_replicas := this.Want_replicas
@@ -239,34 +193,53 @@ func (this KeepClient) putReplicas(
                replicasPerThread = remaining_replicas
        }
 
-       for remaining_replicas > 0 {
-               for active*replicasPerThread < remaining_replicas {
-                       // Start some upload requests
-                       if next_server < len(sv) {
-                               log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
-                               go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestId)
-                               next_server += 1
-                               active += 1
-                       } else {
-                               if active == 0 {
-                                       return locator, (this.Want_replicas - remaining_replicas), InsufficientReplicasError
+       retriesRemaining := 1 + this.Retries
+       var retryServers []string
+
+       for retriesRemaining > 0 {
+               retriesRemaining -= 1
+               next_server = 0
+               retryServers = []string{}
+               for remaining_replicas > 0 {
+                       for active*replicasPerThread < remaining_replicas {
+                               // Start some upload requests
+                               if next_server < len(sv) {
+                                       DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
+                                       go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
+                                       next_server += 1
+                                       active += 1
                                } else {
-                                       break
+                                       if active == 0 && retriesRemaining == 0 {
+                                               return locator, (this.Want_replicas - remaining_replicas), InsufficientReplicasError
+                                       } else {
+                                               break
+                                       }
                                }
                        }
+                       DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v",
+                               requestID, remaining_replicas, active)
+
+                       // Now wait for something to happen.
+                       if active > 0 {
+                               status := <-upload_status
+                               active -= 1
+
+                               if status.statusCode == 200 {
+                                       // good news!
+                                       remaining_replicas -= status.replicas_stored
+                                       locator = status.response
+                               } else if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
+                                       (status.statusCode >= 500 && status.statusCode != 503) {
+                                       // Timeout, too many requests, or other server side failure
+                                       // Do not retry when status code is 503, which means the keep server is full
+                                       retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
+                               }
+                       } else {
+                               break
+                       }
                }
-               log.Printf("[%v] Replicas remaining to write: %v active uploads: %v",
-                       requestId, remaining_replicas, active)
-
-               // Now wait for something to happen.
-               status := <-upload_status
-               active -= 1
 
-               if status.statusCode == 200 {
-                       // good news!
-                       remaining_replicas -= status.replicas_stored
-                       locator = status.response
-               }
+               sv = retryServers
        }
 
        return locator, this.Want_replicas, nil
index a989afcf26cb7a6d7b7bad6e4b1589d30e06520d..3b2db3a321271495a3965759363e2d723ea9e082 100644 (file)
@@ -23,6 +23,7 @@
 package logger
 
 import (
+       "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "log"
        "time"
@@ -73,16 +74,18 @@ type Logger struct {
 }
 
 // Create a new logger based on the specified parameters.
-func NewLogger(params LoggerParams) *Logger {
+func NewLogger(params LoggerParams) (l *Logger, err error) {
        // sanity check parameters
        if &params.Client == nil {
-               log.Fatal("Nil arvados client in LoggerParams passed in to NewLogger()")
+               err = fmt.Errorf("Nil arvados client in LoggerParams passed in to NewLogger()")
+               return
        }
        if params.EventTypePrefix == "" {
-               log.Fatal("Empty event type prefix in LoggerParams passed in to NewLogger()")
+               err = fmt.Errorf("Empty event type prefix in LoggerParams passed in to NewLogger()")
+               return
        }
 
-       l := &Logger{
+       l = &Logger{
                data:        make(map[string]interface{}),
                entry:       make(map[string]interface{}),
                properties:  make(map[string]interface{}),
@@ -97,7 +100,7 @@ func NewLogger(params LoggerParams) *Logger {
        // Start the worker goroutine.
        go l.work()
 
-       return l
+       return l, nil
 }
 
 // Exported functions will be called from other goroutines, therefore
@@ -196,7 +199,6 @@ func (l *Logger) write(isFinal bool) {
        // client.
        err := l.params.Client.Create("logs", l.data, nil)
        if err != nil {
-               log.Printf("Attempted to log: %v", l.data)
-               log.Fatalf("Received error writing log: %v", err)
+               log.Printf("Received error writing %v: %v", l.data, err)
        }
 }
index 4e816cd73b30abbb7afce9f4b51d3767a897cfa8..d4b9830065e4345b1e717f2141e14bdf9d526e5d 100644 (file)
 package manifest
 
 import (
+       "errors"
+       "fmt"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "log"
+       "regexp"
+       "strconv"
        "strings"
 )
 
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+       "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
+
 type Manifest struct {
        Text string
+       Err  error
+}
+
+type BlockLocator struct {
+       Digest blockdigest.BlockDigest
+       Size   int
+       Hints  []string
+}
+
+type DataSegment struct {
+       BlockLocator
+       Locator      string
+       StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+       Locator string
+       // Offset (within this block) of this data segment
+       Offset int
+       Len    int
 }
 
 // Represents a single line from a manifest.
 type ManifestStream struct {
        StreamName string
        Blocks     []string
-       Files      []string
+       FileTokens []string
+       Err        error
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+       if seq == `\\` {
+               return `\`
+       }
+       i, err := strconv.ParseUint(seq[1:], 8, 8)
+       if err != nil {
+               // Invalid escape sequence: can't unescape.
+               return seq
+       }
+       return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+       return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+       if !LocatorPattern.MatchString(s) {
+               err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+                       "\"%s\".",
+                       s,
+                       LocatorPattern.String())
+       } else {
+               tokens := strings.Split(s, "+")
+               var blockSize int64
+               var blockDigest blockdigest.BlockDigest
+               // We expect both of the following to succeed since LocatorPattern
+               // restricts the strings appropriately.
+               blockDigest, err = blockdigest.FromString(tokens[0])
+               if err != nil {
+                       return
+               }
+               blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+               if err != nil {
+                       return
+               }
+               b.Digest = blockDigest
+               b.Size = int(blockSize)
+               b.Hints = tokens[2:]
+       }
+       return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+       parts := strings.SplitN(tok, ":", 3)
+       if len(parts) != 3 {
+               err = ErrInvalidToken
+               return
+       }
+       segPos, err = strconv.ParseUint(parts[0], 10, 64)
+       if err != nil {
+               return
+       }
+       segLen, err = strconv.ParseUint(parts[1], 10, 64)
+       if err != nil {
+               return
+       }
+       name = UnescapeName(parts[2])
+       return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+       ch := make(chan *FileSegment)
+       go func() {
+               s.sendFileSegmentIterByName(filepath, ch)
+               close(ch)
+       }()
+       return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+       blockLens := make([]int, 0, len(s.Blocks))
+       // This is what streamName+"/"+fileName will look like:
+       target := "./" + filepath
+       for _, fTok := range s.FileTokens {
+               wantPos, wantLen, name, err := parseFileToken(fTok)
+               if err != nil {
+                       // Skip (!) invalid file tokens.
+                       continue
+               }
+               if s.StreamName+"/"+name != target {
+                       continue
+               }
+               if wantLen == 0 {
+                       ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+                       continue
+               }
+               // Linear search for blocks containing data for this
+               // file
+               var blockPos uint64 = 0 // position of block in stream
+               for i, loc := range s.Blocks {
+                       if blockPos >= wantPos+wantLen {
+                               break
+                       }
+                       if len(blockLens) <= i {
+                               blockLens = blockLens[:i+1]
+                               b, err := ParseBlockLocator(loc)
+                               if err != nil {
+                                       // Unparseable locator -> unusable
+                                       // stream.
+                                       ch <- nil
+                                       return
+                               }
+                               blockLens[i] = b.Size
+                       }
+                       blockLen := uint64(blockLens[i])
+                       if blockPos+blockLen <= wantPos {
+                               blockPos += blockLen
+                               continue
+                       }
+                       fseg := FileSegment{
+                               Locator: loc,
+                               Offset:  0,
+                               Len:     blockLens[i],
+                       }
+                       if blockPos < wantPos {
+                               fseg.Offset = int(wantPos - blockPos)
+                               fseg.Len -= fseg.Offset
+                       }
+                       if blockPos+blockLen > wantPos+wantLen {
+                               fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+                       }
+                       ch <- &fseg
+                       blockPos += blockLen
+               }
+       }
 }
 
 func parseManifestStream(s string) (m ManifestStream) {
        tokens := strings.Split(s, " ")
-       m.StreamName = tokens[0]
+
+       m.StreamName = UnescapeName(tokens[0])
+       if m.StreamName != "." && !strings.HasPrefix(m.StreamName, "./") {
+               m.Err = fmt.Errorf("Invalid stream name: %s", m.StreamName)
+               return
+       }
+
        tokens = tokens[1:]
        var i int
-       for i = range tokens {
+       for i = 0; i < len(tokens); i++ {
                if !blockdigest.IsBlockLocator(tokens[i]) {
                        break
                }
        }
        m.Blocks = tokens[:i]
-       m.Files = tokens[i:]
+       m.FileTokens = tokens[i:]
+
+       if len(m.Blocks) == 0 {
+               m.Err = fmt.Errorf("No block locators found")
+               return
+       }
+
+       if len(m.FileTokens) == 0 {
+               m.Err = fmt.Errorf("No file tokens found")
+               return
+       }
+
+       for _, ft := range m.FileTokens {
+               _, _, _, err := parseFileToken(ft)
+               if err != nil {
+                       m.Err = fmt.Errorf("Invalid file token: %s", ft)
+                       break
+               }
+       }
+
        return
 }
 
@@ -58,18 +244,38 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
        return ch
 }
 
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+       ch := make(chan *FileSegment)
+       go func() {
+               for stream := range m.StreamIter() {
+                       if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+                               continue
+                       }
+                       stream.sendFileSegmentIterByName(filepath, ch)
+               }
+               close(ch)
+       }()
+       return ch
+}
+
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
+//
+// In order to detect parse errors, caller must check m.Err after the returned channel closes.
 func (m *Manifest) BlockIterWithDuplicates() <-chan blockdigest.BlockLocator {
        blockChannel := make(chan blockdigest.BlockLocator)
        go func(streamChannel <-chan ManifestStream) {
-               for m := range streamChannel {
-                       for _, block := range m.Blocks {
+               for ms := range streamChannel {
+                       if ms.Err != nil {
+                               m.Err = ms.Err
+                               continue
+                       }
+                       for _, block := range ms.Blocks {
                                if b, err := blockdigest.ParseBlockLocator(block); err == nil {
                                        blockChannel <- b
                                } else {
-                                       log.Printf("ERROR: Failed to parse block: %v", err)
+                                       m.Err = err
                                }
                        }
                }
index 8cfe3d907721e4f7d33048dfa16ef91e38dec12d..43c3bd34862343a9eec30870346896d5fd84c15b 100644 (file)
@@ -1,16 +1,20 @@
 package manifest
 
 import (
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        "io/ioutil"
+       "reflect"
+       "regexp"
        "runtime"
        "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
 func getStackTrace() string {
        buf := make([]byte, 1000)
-       bytes_written := runtime.Stack(buf, false)
-       return "Stack Trace:\n" + string(buf[:bytes_written])
+       bytesWritten := runtime.Stack(buf, false)
+       return "Stack Trace:\n" + string(buf[:bytesWritten])
 }
 
 func expectFromChannel(t *testing.T, c <-chan string, expected string) {
@@ -60,7 +64,7 @@ func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
        expectEqual(t, actual.StreamName, expected.StreamName)
        expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
-       expectStringSlicesEqual(t, actual.Files, expected.Files)
+       expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
 }
 
 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
@@ -72,8 +76,24 @@ func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected
 func TestParseManifestStreamSimple(t *testing.T) {
        m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
        expectManifestStream(t, m, ManifestStream{StreamName: ".",
-               Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-               Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+               Blocks:     []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+               FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+       b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+       if err != nil {
+               t.Fatalf("Unexpected error parsing block locator: %v", err)
+       }
+       d, err := blockdigest.FromString("365f83f5f808896ec834c8b595288735")
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString for block locator: %v", err)
+       }
+       expectBlockLocator(t, blockdigest.BlockLocator{b.Digest, b.Size, b.Hints},
+               blockdigest.BlockLocator{Digest: d,
+                       Size: 2310,
+                       Hints: []string{"K@qr1hi",
+                               "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -81,15 +101,15 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
        if err != nil {
                t.Fatalf("Unexpected error reading manifest from file: %v", err)
        }
-       manifest := Manifest{string(content)}
+       manifest := Manifest{Text: string(content)}
        streamIter := manifest.StreamIter()
 
        firstStream := <-streamIter
        expectManifestStream(t,
                firstStream,
                ManifestStream{StreamName: ".",
-                       Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
-                       Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+                       Blocks:     []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
+                       FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
        received, ok := <-streamIter
        if ok {
@@ -103,26 +123,125 @@ func TestBlockIterLongManifest(t *testing.T) {
        if err != nil {
                t.Fatalf("Unexpected error reading manifest from file: %v", err)
        }
-       manifest := Manifest{string(content)}
+       manifest := Manifest{Text: string(content)}
        blockChannel := manifest.BlockIterWithDuplicates()
 
        firstBlock := <-blockChannel
+       d, err := blockdigest.FromString("b746e3d2104645f2f64cd3cc69dd895d")
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString for block: %v", err)
+       }
        expectBlockLocator(t,
                firstBlock,
-               blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
+               blockdigest.BlockLocator{Digest: d,
                        Size:  15693477,
                        Hints: []string{"E2866e643690156651c03d876e638e674dcd79475@5441920c"}})
        blocksRead := 1
        var lastBlock blockdigest.BlockLocator
        for lastBlock = range blockChannel {
-               //log.Printf("Blocks Read: %d", blocksRead)
                blocksRead++
        }
        expectEqual(t, blocksRead, 853)
 
+       d, err = blockdigest.FromString("f9ce82f59e5908d2d70e18df9679b469")
+       if err != nil {
+               t.Fatalf("Unexpected error during FromString for block: %v", err)
+       }
        expectBlockLocator(t,
                lastBlock,
-               blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
+               blockdigest.BlockLocator{Digest: d,
                        Size:  31367794,
                        Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db@5441920c"}})
 }
+
+func TestUnescape(t *testing.T) {
+       for _, testCase := range [][]string{
+               {`\040`, ` `},
+               {`\009`, `\009`},
+               {`\\\040\\`, `\ \`},
+               {`\\040\`, `\040\`},
+       } {
+               in := testCase[0]
+               expect := testCase[1]
+               got := UnescapeName(in)
+               if expect != got {
+                       t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+               }
+       }
+}
+
+type fsegtest struct {
+       mt   string        // manifest text
+       f    string        // filename
+       want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+       mt := arvadostest.PathologicalManifest
+       for _, testCase := range []fsegtest{
+               {mt: mt, f: "zzzz", want: nil},
+               // This case is too sensitive: it would be acceptable
+               // (even preferable) to return only one empty segment.
+               {mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+               {mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+               {mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+               {mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+               {mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+               {mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+               // This case is too sensitive: it would be better to
+               // omit the empty segment.
+               {mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+               {mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+       } {
+               m := Manifest{Text: testCase.mt}
+               var got []FileSegment
+               for fs := range m.FileSegmentIterByName(testCase.f) {
+                       got = append(got, *fs)
+               }
+               if !reflect.DeepEqual(got, testCase.want) {
+                       t.Errorf("For %#v:\n got  %#v\n want %#v", testCase.f, got, testCase.want)
+               }
+       }
+}
+
+func TestBlockIterWithBadManifest(t *testing.T) {
+       testCases := [][]string{
+               {"badstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt", "Invalid stream name: badstream"},
+               {"/badstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt", "Invalid stream name: /badstream"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3 file1.txt", "Invalid file token: file1.txt"},
+               {". acbd18db4cc2f85cedef654fccc4a4+3 0:1:file1.txt", "No block locators found"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8 0:1:file1.txt", "No block locators found"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt file2.txt 1:2:file3.txt", "Invalid file token: file2.txt"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt. bcde18db4cc2f85cedef654fccc4a4d8+3 1:2:file3.txt", "Invalid file token: bcde18db4cc2f85cedef654fccc4a4d8.*"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt\n. acbd18db4cc2f85cedef654fccc4a4d8+3 ::file2.txt\n", "Invalid file token: ::file2.txt"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3 bcde18db4cc2f85cedef654fccc4a4d8+3\n", "No file tokens found"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3 ", "Invalid file token"},
+               {". acbd18db4cc2f85cedef654fccc4a4d8+3", "No file tokens found"},
+               {". 0:1:file1.txt\n", "No block locators found"},
+               {".\n", "No block locators found"},
+       }
+
+       for _, testCase := range testCases {
+               manifest := Manifest{Text: string(testCase[0])}
+               blockChannel := manifest.BlockIterWithDuplicates()
+
+               for block := range blockChannel {
+                       _ = block
+               }
+
+               // completed reading from blockChannel; now check for errors
+               if manifest.Err == nil {
+                       t.Fatalf("Expected error")
+               }
+
+               matched, _ := regexp.MatchString(testCase[1], manifest.Err.Error())
+               if !matched {
+                       t.Fatalf("Expected error not found. Expected: %v; Found: %v", testCase[1], manifest.Err.Error())
+               }
+       }
+}
index 853d7d30354daddb86e602c4b580141fc18e1bdf..80aeb268975d8acbc7f7d1c2e771c17e7adfa719 100644 (file)
@@ -251,6 +251,7 @@ func (s *StandaloneSuite) TestTransferShortBuffer(c *C) {
 
        n, err := sr.Read(out)
        c.Check(n, Equals, 100)
+       c.Check(err, IsNil)
 
        n, err = sr.Read(out)
        c.Check(n, Equals, 0)
index a4a194f69bcc8fbdbf43650caa881325bde5dc24..3f5f9344c521f3021e2f6fd35f025e4b9c7fb95e 100644 (file)
@@ -249,9 +249,7 @@ func (this *AsyncStream) transfer(source_reader io.Reader) {
                                        }
                                }
                        } else {
-                               if reader_status == io.EOF {
-                                       // no more reads expected, so this is ok
-                               } else {
+                               if reader_status == nil {
                                        // slices channel closed without signaling EOF
                                        reader_status = io.ErrUnexpectedEOF
                                }
diff --git a/sdk/pam/LICENSE-2.0.txt b/sdk/pam/LICENSE-2.0.txt
new file mode 100644 (file)
index 0000000..d645695
--- /dev/null
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
index c17568cce9d1b6e85d86ec76a7f476862414fdef..28885e04ba9ff94802662fe0d07a80c6d1d565d7 100644 (file)
@@ -1,3 +1,4 @@
+include LICENSE-2.0.txt
 include README.rst
 include examples/shellinabox
 include lib/libpam_arvados.py
index d261cebf410086694a0d2a0d1931726c6eca1631..c194013d49074e3652216b2070e1eaa59bd0e055 100755 (executable)
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import glob
 import os
 import sys
 import setuptools.command.egg_info as egg_info_cmd
@@ -33,8 +34,9 @@ setup(name='arvados-pam',
       ],
       data_files=[
           ('lib/security', ['lib/libpam_arvados.py']),
-          ('share/arvados-pam', ['examples/shellinabox']),
           ('share/pam-configs', ['pam-configs/arvados']),
+          ('share/doc/arvados-pam', ['LICENSE-2.0.txt', 'README.rst']),
+          ('share/doc/arvados-pam/examples', glob.glob('examples/*')),
       ],
       install_requires=[
           'arvados-python-client>=0.1.20150801000000',
diff --git a/sdk/python/LICENSE-2.0.txt b/sdk/python/LICENSE-2.0.txt
new file mode 100644 (file)
index 0000000..d645695
--- /dev/null
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
index 9561fb1061f6de114633c70871232a6896dcbe8a..71dc5367fef6efa133d6c5bff187b80da8dd94fd 100644 (file)
@@ -1 +1,2 @@
+include LICENSE-2.0.txt
 include README.rst
index 1df64703a9577d7338f60f5dd944b1c72d854064..b74f828f4bd04a2a6321aa50e5f823cb3a2496ab 100644 (file)
@@ -23,6 +23,7 @@ from collection import CollectionReader, CollectionWriter, ResumableCollectionWr
 from keep import *
 from stream import *
 from arvfile import StreamFileReader
+from retry import RetryLoop
 import errors
 import util
 
@@ -37,36 +38,60 @@ logger.addHandler(log_handler)
 logger.setLevel(logging.DEBUG if config.get('ARVADOS_DEBUG')
                 else logging.WARNING)
 
-def task_set_output(self,s):
-    api('v1').job_tasks().update(uuid=self['uuid'],
-                                 body={
-            'output':s,
-            'success':True,
-            'progress':1.0
-            }).execute()
+def task_set_output(self, s, num_retries=5):
+    for tries_left in RetryLoop(num_retries=num_retries, backoff_start=0):
+        try:
+            return api('v1').job_tasks().update(
+                uuid=self['uuid'],
+                body={
+                    'output':s,
+                    'success':True,
+                    'progress':1.0
+                }).execute()
+        except errors.ApiError as error:
+            if retry.check_http_response_success(error.resp.status) is None and tries_left > 0:
+                logger.debug("task_set_output: job_tasks().update() raised {}, retrying with {} tries left".format(repr(error),tries_left))
+            else:
+                raise
 
 _current_task = None
-def current_task():
+def current_task(num_retries=5):
     global _current_task
     if _current_task:
         return _current_task
-    t = api('v1').job_tasks().get(uuid=os.environ['TASK_UUID']).execute()
-    t = UserDict.UserDict(t)
-    t.set_output = types.MethodType(task_set_output, t)
-    t.tmpdir = os.environ['TASK_WORK']
-    _current_task = t
-    return t
+
+    for tries_left in RetryLoop(num_retries=num_retries, backoff_start=2):
+        try:
+            task = api('v1').job_tasks().get(uuid=os.environ['TASK_UUID']).execute()
+            task = UserDict.UserDict(task)
+            task.set_output = types.MethodType(task_set_output, task)
+            task.tmpdir = os.environ['TASK_WORK']
+            _current_task = task
+            return task
+        except errors.ApiError as error:
+            if retry.check_http_response_success(error.resp.status) is None and tries_left > 0:
+                logger.debug("current_task: job_tasks().get() raised {}, retrying with {} tries left".format(repr(error),tries_left))
+            else:
+                raise
 
 _current_job = None
-def current_job():
+def current_job(num_retries=5):
     global _current_job
     if _current_job:
         return _current_job
-    t = api('v1').jobs().get(uuid=os.environ['JOB_UUID']).execute()
-    t = UserDict.UserDict(t)
-    t.tmpdir = os.environ['JOB_WORK']
-    _current_job = t
-    return t
+
+    for tries_left in RetryLoop(num_retries=num_retries, backoff_start=2):
+        try:
+            job = api('v1').jobs().get(uuid=os.environ['JOB_UUID']).execute()
+            job = UserDict.UserDict(job)
+            job.tmpdir = os.environ['JOB_WORK']
+            _current_job = job
+            return job
+        except errors.ApiError as error:
+            if retry.check_http_response_success(error.resp.status) is None and tries_left > 0:
+                logger.debug("current_job: jobs().get() raised {}, retrying with {} tries left".format(repr(error),tries_left))
+            else:
+                raise
 
 def getjobparam(*args):
     return current_job()['script_parameters'].get(*args)
index 83437b2adb9f7817ac0b5ee210cfdb6d50915b90..83874164eebd8bd88b03e6558f14f62c0b991033 100644 (file)
@@ -6,6 +6,8 @@ _logger = logging.getLogger('arvados.ranges')
 RANGES_SPAM = 9
 
 class Range(object):
+    __slots__ = ("locator", "range_start", "range_size", "segment_offset")
+
     def __init__(self, locator, range_start, range_size, segment_offset=0):
         self.locator = locator
         self.range_start = range_start
@@ -53,6 +55,8 @@ def first_block(data_locators, range_start):
     return i
 
 class LocatorAndRange(object):
+    __slots__ = ("locator", "block_size", "segment_offset", "segment_size")
+
     def __init__(self, locator, block_size, segment_offset, segment_size):
         self.locator = locator
         self.block_size = block_size
index 086487aa09714e2873e2b49ac7cafff222ea0d1b..e2e8ba19b64823af233c25eba1f90880dc9f4067 100644 (file)
@@ -1,9 +1,12 @@
 import collections
+import httplib
 import httplib2
 import json
 import logging
 import os
 import re
+import socket
+import time
 import types
 
 import apiclient
@@ -15,6 +18,11 @@ import util
 
 _logger = logging.getLogger('arvados.api')
 
+MAX_IDLE_CONNECTION_DURATION = 30
+RETRY_DELAY_INITIAL = 2
+RETRY_DELAY_BACKOFF = 2
+RETRY_COUNT = 2
+
 class OrderedJsonModel(apiclient.model.JsonModel):
     """Model class for JSON that preserves the contents' order.
 
@@ -36,8 +44,6 @@ class OrderedJsonModel(apiclient.model.JsonModel):
 
 
 def _intercept_http_request(self, uri, **kwargs):
-    from httplib import BadStatusLine
-
     if (self.max_request_size and
         kwargs.get('body') and
         self.max_request_size < len(kwargs['body'])):
@@ -50,22 +56,54 @@ def _intercept_http_request(self, uri, **kwargs):
         kwargs['headers']['X-External-Client'] = '1'
 
     kwargs['headers']['Authorization'] = 'OAuth2 %s' % self.arvados_api_token
-    try:
-        return self.orig_http_request(uri, **kwargs)
-    except BadStatusLine:
-        # This is how httplib tells us that it tried to reuse an
-        # existing connection but it was already closed by the
-        # server. In that case, yes, we would like to retry.
-        # Unfortunately, we are not absolutely certain that the
-        # previous call did not succeed, so this is slightly
-        # risky.
-        return self.orig_http_request(uri, **kwargs)
+
+    retryable = kwargs.get('method', 'GET') in [
+        'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PUT']
+    retry_count = self._retry_count if retryable else 0
+
+    if (not retryable and
+        time.time() - self._last_request_time > self._max_keepalive_idle):
+        # High probability of failure due to connection atrophy. Make
+        # sure this request [re]opens a new connection by closing and
+        # forgetting all cached connections first.
+        for conn in self.connections.itervalues():
+            conn.close()
+        self.connections.clear()
+
+    delay = self._retry_delay_initial
+    for _ in range(retry_count):
+        self._last_request_time = time.time()
+        try:
+            return self.orig_http_request(uri, **kwargs)
+        except httplib.HTTPException:
+            _logger.debug("Retrying API request in %d s after HTTP error",
+                          delay, exc_info=True)
+        except socket.error:
+            # This is the one case where httplib2 doesn't close the
+            # underlying connection first.  Close all open
+            # connections, expecting this object only has the one
+            # connection to the API server.  This is safe because
+            # httplib2 reopens connections when needed.
+            _logger.debug("Retrying API request in %d s after socket error",
+                          delay, exc_info=True)
+            for conn in self.connections.itervalues():
+                conn.close()
+        time.sleep(delay)
+        delay = delay * self._retry_delay_backoff
+
+    self._last_request_time = time.time()
+    return self.orig_http_request(uri, **kwargs)
 
 def _patch_http_request(http, api_token):
     http.arvados_api_token = api_token
     http.max_request_size = 0
     http.orig_http_request = http.request
     http.request = types.MethodType(_intercept_http_request, http)
+    http._last_request_time = 0
+    http._max_keepalive_idle = MAX_IDLE_CONNECTION_DURATION
+    http._retry_delay_initial = RETRY_DELAY_INITIAL
+    http._retry_delay_backoff = RETRY_DELAY_BACKOFF
+    http._retry_count = RETRY_COUNT
     return http
 
 # Monkey patch discovery._cast() so objects and arrays get serialized
index 0326608a0bb6e3270341f1ad1b01a3e7926958ac..71af6445a5ec508f1953777871a345c2d3e03ca1 100644 (file)
@@ -543,9 +543,6 @@ class _BlockManager(object):
     def __exit__(self, exc_type, exc_value, traceback):
         self.stop_threads()
 
-    def __del__(self):
-        self.stop_threads()
-
     def commit_bufferblock(self, block, sync):
         """Initiate a background upload of a bufferblock.
 
index 7ca6e7ca234f9b7ef2f1bc4dfc6ef84910c9c1e0..6fa26c672d26962a3e9cd01e140907eecec783e4 100644 (file)
@@ -282,7 +282,9 @@ class ArvPutCollectionWriter(arvados.ResumableCollectionWriter):
                                     replication=replication)
         except (TypeError, ValueError,
                 arvados.errors.StaleWriterStateError) as error:
-            return cls(cache, reporter, bytes_expected, num_retries=num_retries)
+            return cls(cache, reporter, bytes_expected,
+                       num_retries=num_retries,
+                       replication=replication)
         else:
             return writer
 
index be94e7304a34f3a2cdc829cd9a9f4b8339aaaf33..5c8bced513c160dd64e2cdbf3f4433d72ce89fe6 100644 (file)
@@ -52,7 +52,7 @@ def is_in_collection(root, branch):
         else:
             sp = os.path.split(root)
             return is_in_collection(sp[0], os.path.join(sp[1], branch))
-    except IOError, OSError:
+    except (IOError, OSError):
         return (None, None)
 
 # Determine the project to place the output of this command by searching upward
@@ -73,7 +73,7 @@ def determine_project(root, current_user):
         else:
             sp = os.path.split(root)
             return determine_project(sp[0], current_user)
-    except IOError, OSError:
+    except (IOError, OSError):
         return current_user
 
 # Determine if string corresponds to a file, and if that file is part of a
@@ -101,7 +101,7 @@ def statfile(prefix, fn):
 
     return prefix+fn
 
-def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
+def uploadfiles(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)"):
     # Find the smallest path prefix that includes all the files that need to be uploaded.
     # This starts at the root and iteratively removes common parent directory prefixes
     # until all file pathes no longer have a common parent.
@@ -153,7 +153,7 @@ def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
         logger.info("Uploaded to %s", item["uuid"])
 
     for c in files:
-        c.fn = "$(file %s/%s)" % (pdh, c.fn)
+        c.fn = fnPattern % (pdh, c.fn)
 
     os.chdir(orgdir)
 
@@ -238,7 +238,7 @@ def main(arguments=None):
 
     files = [c for command in slots[1:] for c in command if isinstance(c, UploadFile)]
     if files:
-        uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.num_retries, project=project)
+        uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.retries, project=project)
 
     for i in xrange(1, len(slots)):
         slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
diff --git a/sdk/python/arvados/crunch.py b/sdk/python/arvados/crunch.py
new file mode 100644 (file)
index 0000000..c184e6a
--- /dev/null
@@ -0,0 +1,27 @@
+import json
+import os
+
+class TaskOutputDir(object):
+    """Keep-backed directory for staging outputs of Crunch tasks.
+
+    Example, in a crunch task whose output is a file called "out.txt"
+    containing "42":
+
+        import arvados
+        import arvados.crunch
+        import os
+
+        out = arvados.crunch.TaskOutputDir()
+        with open(os.path.join(out.path, 'out.txt'), 'w') as f:
+            f.write('42')
+        arvados.current_task().set_output(out.manifest_text())
+    """
+    def __init__(self):
+        self.path = os.environ['TASK_KEEPMOUNT_TMP']
+
+    def __str__(self):
+        return self.path
+
+    def manifest_text(self):
+        snapshot = os.path.join(self.path, '.arvados#collection')
+        return json.load(open(snapshot))['manifest_text']
index df5b3e7dee514b8aea45edaa73bcd47042d8fab3..3132da3a0a2e271350c1a5418efa577703555231 100644 (file)
@@ -28,33 +28,24 @@ class EventClient(WebSocketClient):
         super(EventClient, self).__init__(url, ssl_options=ssl_options)
         self.filters = filters
         self.on_event = on_event
-        self.stop = threading.Event()
         self.last_log_id = last_log_id
+        self._closed_lock = threading.RLock()
+        self._closed = False
 
     def opened(self):
         self.subscribe(self.filters, self.last_log_id)
 
     def received_message(self, m):
-        self.on_event(json.loads(str(m)))
-
-    def closed(self, code, reason=None):
-        self.stop.set()
+        with self._closed_lock:
+            if not self._closed:
+                self.on_event(json.loads(str(m)))
 
     def close(self, code=1000, reason=''):
         """Close event client and wait for it to finish."""
-
-        # parent close() method sends a asynchronous "closed" event to the server
         super(EventClient, self).close(code, reason)
-
-        # if server doesn't respond by finishing the close handshake, we'll be
-        # stuck in limbo forever.  We don't need to wait for the server to
-        # respond to go ahead and actually close the socket.
-        self.close_connection()
-
-        # wait for websocket thread to finish up (closed() is called by
-        # websocket thread in as part of terminate())
-        while not self.stop.is_set():
-            self.stop.wait(1)
+        with self._closed_lock:
+            # make sure we don't process any more messages.
+            self._closed = True
 
     def subscribe(self, filters, last_log_id=None):
         m = {"method": "subscribe", "filters": filters}
index 0754744241a97a0011fb71ace5792a7be0c27148..c584f04ca06226b842e690751fb323fe60ac256a 100644 (file)
@@ -1,28 +1,16 @@
-import bz2
+import cStringIO
 import datetime
-import fcntl
-import functools
-import gflags
 import hashlib
-import json
 import logging
+import math
 import os
-import pprint
 import pycurl
 import Queue
 import re
 import socket
 import ssl
-import string
-import cStringIO
-import subprocess
-import sys
 import threading
-import time
 import timer
-import types
-import UserDict
-import zlib
 
 import arvados
 import arvados.config as config
@@ -160,6 +148,8 @@ class KeepBlockCache(object):
         self._cache_lock = threading.Lock()
 
     class CacheSlot(object):
+        __slots__ = ("locator", "ready", "content")
+
         def __init__(self, locator):
             self.locator = locator
             self.ready = threading.Event()
@@ -222,72 +212,103 @@ class KeepBlockCache(object):
                 self._cache.insert(0, n)
                 return n, True
 
+
+class Counter(object):
+    def __init__(self, v=0):
+        self._lk = threading.Lock()
+        self._val = v
+
+    def add(self, v):
+        with self._lk:
+            self._val += v
+
+    def get(self):
+        with self._lk:
+            return self._val
+
+
 class KeepClient(object):
 
     # Default Keep server connection timeout:  2 seconds
-    # Default Keep server read timeout:      300 seconds
+    # Default Keep server read timeout:       64 seconds
+    # Default Keep server bandwidth minimum:  32768 bytes per second
     # Default Keep proxy connection timeout:  20 seconds
-    # Default Keep proxy read timeout:       300 seconds
-    DEFAULT_TIMEOUT = (2, 300)
-    DEFAULT_PROXY_TIMEOUT = (20, 300)
+    # Default Keep proxy read timeout:        64 seconds
+    # Default Keep proxy bandwidth minimum:   32768 bytes per second
+    DEFAULT_TIMEOUT = (2, 64, 32768)
+    DEFAULT_PROXY_TIMEOUT = (20, 64, 32768)
 
     class ThreadLimiter(object):
-        """
-        Limit the number of threads running at a given time to
-        {desired successes} minus {successes reported}. When successes
-        reported == desired, wake up the remaining threads and tell
-        them to quit.
+        """Limit the number of threads writing to Keep at once.
+
+        This ensures that only a number of writer threads that could
+        potentially achieve the desired replication level run at once.
+        Once the desired replication level is achieved, queued threads
+        are instructed not to run.
 
         Should be used in a "with" block.
         """
-        def __init__(self, todo):
-            self._todo = todo
+        def __init__(self, want_copies, max_service_replicas):
+            self._started = 0
+            self._want_copies = want_copies
             self._done = 0
             self._response = None
-            self._todo_lock = threading.Semaphore(todo)
+            self._start_lock = threading.Condition()
+            if (not max_service_replicas) or (max_service_replicas >= want_copies):
+                max_threads = 1
+            else:
+                max_threads = math.ceil(float(want_copies) / max_service_replicas)
+            _logger.debug("Limiter max threads is %d", max_threads)
+            self._todo_lock = threading.Semaphore(max_threads)
             self._done_lock = threading.Lock()
+            self._local = threading.local()
 
         def __enter__(self):
+            self._start_lock.acquire()
+            if getattr(self._local, 'sequence', None) is not None:
+                # If the calling thread has used set_sequence(N), then
+                # we wait here until N other threads have started.
+                while self._started < self._local.sequence:
+                    self._start_lock.wait()
             self._todo_lock.acquire()
+            self._started += 1
+            self._start_lock.notifyAll()
+            self._start_lock.release()
             return self
 
         def __exit__(self, type, value, traceback):
             self._todo_lock.release()
 
+        def set_sequence(self, sequence):
+            self._local.sequence = sequence
+
         def shall_i_proceed(self):
             """
-            Return true if the current thread should do stuff. Return
-            false if the current thread should just stop.
+            Return true if the current thread should write to Keep.
+            Return false otherwise.
             """
             with self._done_lock:
-                return (self._done < self._todo)
+                return (self._done < self._want_copies)
 
         def save_response(self, response_body, replicas_stored):
             """
             Records a response body (a locator, possibly signed) returned by
-            the Keep server.  It is not necessary to save more than
-            one response, since we presume that any locator returned
-            in response to a successful request is valid.
+            the Keep server, and the number of replicas it stored.
             """
             with self._done_lock:
                 self._done += replicas_stored
                 self._response = response_body
 
         def response(self):
-            """
-            Returns the body from the response to a PUT request.
-            """
+            """Return the body from the response to a PUT request."""
             with self._done_lock:
                 return self._response
 
         def done(self):
-            """
-            Return how many successes were reported.
-            """
+            """Return the total number of replicas successfully stored."""
             with self._done_lock:
                 return self._done
 
-
     class KeepService(object):
         """Make requests to a single Keep service, and track results.
 
@@ -306,7 +327,9 @@ class KeepClient(object):
             arvados.errors.HttpError,
         )
 
-        def __init__(self, root, user_agent_pool=Queue.LifoQueue(), **headers):
+        def __init__(self, root, user_agent_pool=Queue.LifoQueue(),
+                     upload_counter=None,
+                     download_counter=None, **headers):
             self.root = root
             self._user_agent_pool = user_agent_pool
             self._result = {'error': None}
@@ -315,6 +338,8 @@ class KeepClient(object):
             self.get_headers = {'Accept': 'application/octet-stream'}
             self.get_headers.update(headers)
             self.put_headers = headers
+            self.upload_counter = upload_counter
+            self.download_counter = download_counter
 
         def usable(self):
             """Is it worth attempting a request?"""
@@ -400,11 +425,13 @@ class KeepClient(object):
                 _logger.debug("Request fail: GET %s => %s: %s",
                               url, type(self._result['error']), str(self._result['error']))
                 return None
-            _logger.info("%s response: %s bytes in %s msec (%.3f MiB/sec)",
+            _logger.info("GET %s: %s bytes in %s msec (%.3f MiB/sec)",
                          self._result['status_code'],
                          len(self._result['body']),
                          t.msecs,
                          (len(self._result['body'])/(1024.0*1024))/t.secs if t.secs > 0 else 0)
+            if self.download_counter:
+                self.download_counter.add(len(self._result['body']))
             resp_md5 = hashlib.md5(self._result['body']).hexdigest()
             if resp_md5 != locator.md5sum:
                 _logger.warning("Checksum fail: md5(%s) = %s",
@@ -419,36 +446,37 @@ class KeepClient(object):
             _logger.debug("Request: PUT %s", url)
             curl = self._get_user_agent()
             try:
-                self._headers = {}
-                body_reader = cStringIO.StringIO(body)
-                response_body = cStringIO.StringIO()
-                curl.setopt(pycurl.NOSIGNAL, 1)
-                curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
-                curl.setopt(pycurl.URL, url.encode('utf-8'))
-                # Using UPLOAD tells cURL to wait for a "go ahead" from the
-                # Keep server (in the form of a HTTP/1.1 "100 Continue"
-                # response) instead of sending the request body immediately.
-                # This allows the server to reject the request if the request
-                # is invalid or the server is read-only, without waiting for
-                # the client to send the entire block.
-                curl.setopt(pycurl.UPLOAD, True)
-                curl.setopt(pycurl.INFILESIZE, len(body))
-                curl.setopt(pycurl.READFUNCTION, body_reader.read)
-                curl.setopt(pycurl.HTTPHEADER, [
-                    '{}: {}'.format(k,v) for k,v in self.put_headers.iteritems()])
-                curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
-                curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
-                self._setcurltimeouts(curl, timeout)
-                try:
-                    curl.perform()
-                except Exception as e:
-                    raise arvados.errors.HttpError(0, str(e))
-                self._result = {
-                    'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
-                    'body': response_body.getvalue(),
-                    'headers': self._headers,
-                    'error': False,
-                }
+                with timer.Timer() as t:
+                    self._headers = {}
+                    body_reader = cStringIO.StringIO(body)
+                    response_body = cStringIO.StringIO()
+                    curl.setopt(pycurl.NOSIGNAL, 1)
+                    curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+                    curl.setopt(pycurl.URL, url.encode('utf-8'))
+                    # Using UPLOAD tells cURL to wait for a "go ahead" from the
+                    # Keep server (in the form of a HTTP/1.1 "100 Continue"
+                    # response) instead of sending the request body immediately.
+                    # This allows the server to reject the request if the request
+                    # is invalid or the server is read-only, without waiting for
+                    # the client to send the entire block.
+                    curl.setopt(pycurl.UPLOAD, True)
+                    curl.setopt(pycurl.INFILESIZE, len(body))
+                    curl.setopt(pycurl.READFUNCTION, body_reader.read)
+                    curl.setopt(pycurl.HTTPHEADER, [
+                        '{}: {}'.format(k,v) for k,v in self.put_headers.iteritems()])
+                    curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
+                    curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                    self._setcurltimeouts(curl, timeout)
+                    try:
+                        curl.perform()
+                    except Exception as e:
+                        raise arvados.errors.HttpError(0, str(e))
+                    self._result = {
+                        'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
+                        'body': response_body.getvalue(),
+                        'headers': self._headers,
+                        'error': False,
+                    }
                 ok = retry.check_http_response_success(self._result['status_code'])
                 if not ok:
                     self._result['error'] = arvados.errors.HttpError(
@@ -469,17 +497,30 @@ class KeepClient(object):
                 _logger.debug("Request fail: PUT %s => %s: %s",
                               url, type(self._result['error']), str(self._result['error']))
                 return False
+            _logger.info("PUT %s: %s bytes in %s msec (%.3f MiB/sec)",
+                         self._result['status_code'],
+                         len(body),
+                         t.msecs,
+                         (len(body)/(1024.0*1024))/t.secs if t.secs > 0 else 0)
+            if self.upload_counter:
+                self.upload_counter.add(len(body))
             return True
 
         def _setcurltimeouts(self, curl, timeouts):
             if not timeouts:
                 return
             elif isinstance(timeouts, tuple):
-                conn_t, xfer_t = timeouts
+                if len(timeouts) == 2:
+                    conn_t, xfer_t = timeouts
+                    bandwidth_bps = KeepClient.DEFAULT_TIMEOUT[2]
+                else:
+                    conn_t, xfer_t, bandwidth_bps = timeouts
             else:
                 conn_t, xfer_t = (timeouts, timeouts)
+                bandwidth_bps = KeepClient.DEFAULT_TIMEOUT[2]
             curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(conn_t*1000))
-            curl.setopt(pycurl.TIMEOUT_MS, int(xfer_t*1000))
+            curl.setopt(pycurl.LOW_SPEED_TIME, int(math.ceil(xfer_t)))
+            curl.setopt(pycurl.LOW_SPEED_LIMIT, int(math.ceil(bandwidth_bps)))
 
         def _headerfunction(self, header_line):
             header_line = header_line.decode('iso-8859-1')
@@ -517,7 +558,11 @@ class KeepClient(object):
             return self._success
 
         def run(self):
-            with self.args['thread_limiter'] as limiter:
+            limiter = self.args['thread_limiter']
+            sequence = self.args['thread_sequence']
+            if sequence is not None:
+                limiter.set_sequence(sequence)
+            with limiter:
                 if not limiter.shall_i_proceed():
                     # My turn arrived, but the job has been done without
                     # me.
@@ -579,20 +624,22 @@ class KeepClient(object):
 
         :timeout:
           The initial timeout (in seconds) for HTTP requests to Keep
-          non-proxy servers.  A tuple of two floats is interpreted as
-          (connection_timeout, read_timeout): see
-          http://docs.python-requests.org/en/latest/user/advanced/#timeouts.
-          Because timeouts are often a result of transient server load, the
-          actual connection timeout will be increased by a factor of two on
-          each retry.
-          Default: (2, 300).
+          non-proxy servers.  A tuple of three floats is interpreted as
+          (connection_timeout, read_timeout, minimum_bandwidth). A connection
+          will be aborted if the average traffic rate falls below
+          minimum_bandwidth bytes per second over an interval of read_timeout
+          seconds. Because timeouts are often a result of transient server
+          load, the actual connection timeout will be increased by a factor
+          of two on each retry.
+          Default: (2, 64, 32768).
 
         :proxy_timeout:
           The initial timeout (in seconds) for HTTP requests to
-          Keep proxies. A tuple of two floats is interpreted as
-          (connection_timeout, read_timeout). The behavior described
-          above for adjusting connection timeouts on retry also applies.
-          Default: (20, 300).
+          Keep proxies. A tuple of three floats is interpreted as
+          (connection_timeout, read_timeout, minimum_bandwidth). The behavior
+          described above for adjusting connection timeouts on retry also
+          applies.
+          Default: (20, 64, 32768).
 
         :api_token:
           If you're not using an API client, but only talking
@@ -632,6 +679,12 @@ class KeepClient(object):
         self.timeout = timeout
         self.proxy_timeout = proxy_timeout
         self._user_agent_pool = Queue.LifoQueue()
+        self.upload_counter = Counter()
+        self.download_counter = Counter()
+        self.put_counter = Counter()
+        self.get_counter = Counter()
+        self.hits_counter = Counter()
+        self.misses_counter = Counter()
 
         if local_store:
             self.local_store = local_store
@@ -639,6 +692,7 @@ class KeepClient(object):
             self.put = self.local_store_put
         else:
             self.num_retries = num_retries
+            self.max_replicas_per_service = None
             if proxy:
                 if not proxy.endswith('/'):
                     proxy += '/'
@@ -646,12 +700,12 @@ class KeepClient(object):
                 self._gateway_services = {}
                 self._keep_services = [{
                     'uuid': 'proxy',
+                    'service_type': 'proxy',
                     '_service_root': proxy,
                     }]
                 self._writable_services = self._keep_services
                 self.using_proxy = True
                 self._static_services_list = True
-                self.max_replicas_per_service = 1
             else:
                 # It's important to avoid instantiating an API client
                 # unless we actually need one, for testing's sake.
@@ -664,7 +718,6 @@ class KeepClient(object):
                 self._writable_services = None
                 self.using_proxy = None
                 self._static_services_list = False
-                self.max_replicas_per_service = 1
 
     def current_timeout(self, attempt_number):
         """Return the appropriate timeout to use for this client.
@@ -679,7 +732,13 @@ class KeepClient(object):
         # TODO(twp): the timeout should be a property of a
         # KeepService, not a KeepClient. See #4488.
         t = self.proxy_timeout if self.using_proxy else self.timeout
-        return (t[0] * (1 << attempt_number), t[1])
+        if len(t) == 2:
+            return (t[0] * (1 << attempt_number), t[1])
+        else:
+            return (t[0] * (1 << attempt_number), t[1], t[2])
+    def _any_nondisk_services(self, service_list):
+        return any(ks.get('service_type', 'disk') != 'disk'
+                   for ks in service_list)
 
     def build_services_list(self, force_rebuild=False):
         if (self._static_services_list or
@@ -691,12 +750,16 @@ class KeepClient(object):
             except Exception:  # API server predates Keep services.
                 keep_services = self.api_client.keep_disks().list()
 
-            accessible = keep_services.execute().get('items')
-            if not accessible:
+            # Gateway services are only used when specified by UUID,
+            # so there's nothing to gain by filtering them by
+            # service_type.
+            self._gateway_services = {ks['uuid']: ks for ks in
+                                      keep_services.execute()['items']}
+            if not self._gateway_services:
                 raise arvados.errors.NoKeepServersError()
 
             # Precompute the base URI for each service.
-            for r in accessible:
+            for r in self._gateway_services.itervalues():
                 host = r['service_host']
                 if not host.startswith('[') and host.find(':') >= 0:
                     # IPv6 URIs must be formatted like http://[::1]:80/...
@@ -706,27 +769,19 @@ class KeepClient(object):
                     host,
                     r['service_port'])
 
-            # Gateway services are only used when specified by UUID,
-            # so there's nothing to gain by filtering them by
-            # service_type.
-            self._gateway_services = {ks.get('uuid'): ks for ks in accessible}
             _logger.debug(str(self._gateway_services))
-
             self._keep_services = [
-                ks for ks in accessible
-                if ks.get('service_type') in ['disk', 'proxy']]
-            self._writable_services = [
-                ks for ks in accessible
-                if (ks.get('service_type') in ['disk', 'proxy']) and (True != ks.get('read_only'))]
-            _logger.debug(str(self._keep_services))
-
-            self.using_proxy = any(ks.get('service_type') == 'proxy'
-                                   for ks in self._keep_services)
+                ks for ks in self._gateway_services.itervalues()
+                if not ks.get('service_type', '').startswith('gateway:')]
+            self._writable_services = [ks for ks in self._keep_services
+                                       if not ks.get('read_only')]
+
             # For disk type services, max_replicas_per_service is 1
-            # It is unknown or unlimited for non-disk typed services.
-            for ks in accessible:
-                if ('disk' != ks.get('service_type')) and (not ks.get('read_only')):
-                    self.max_replicas_per_service = None
+            # It is unknown (unlimited) for other service types.
+            if self._any_nondisk_services(self._writable_services):
+                self.max_replicas_per_service = None
+            else:
+                self.max_replicas_per_service = 1
 
     def _service_weight(self, data_hash, service_uuid):
         """Compute the weight of a Keep service endpoint for a data
@@ -763,7 +818,8 @@ class KeepClient(object):
         # in that order.
         use_services = self._keep_services
         if need_writable:
-          use_services = self._writable_services
+            use_services = self._writable_services
+        self.using_proxy = self._any_nondisk_services(use_services)
         sorted_roots.extend([
             svc['_service_root'] for svc in sorted(
                 use_services,
@@ -782,7 +838,10 @@ class KeepClient(object):
         for root in local_roots:
             if root not in roots_map:
                 roots_map[root] = self.KeepService(
-                    root, self._user_agent_pool, **headers)
+                    root, self._user_agent_pool,
+                    upload_counter=self.upload_counter,
+                    download_counter=self.download_counter,
+                    **headers)
         return local_roots
 
     @staticmethod
@@ -834,12 +893,18 @@ class KeepClient(object):
         """
         if ',' in loc_s:
             return ''.join(self.get(x) for x in loc_s.split(','))
+
+        self.get_counter.add(1)
+
         locator = KeepLocator(loc_s)
         slot, first = self.block_cache.reserve_cache(locator.md5sum)
         if not first:
+            self.hits_counter.add(1)
             v = slot.get()
             return v
 
+        self.misses_counter.add(1)
+
         # If the locator has hints specifying a prefix (indicating a
         # remote keepproxy) or the UUID of a local gateway service,
         # read data from the indicated service(s) instead of the usual
@@ -854,7 +919,9 @@ class KeepClient(object):
                                    )])
         # Map root URLs to their KeepService objects.
         roots_map = {
-            root: self.KeepService(root, self._user_agent_pool)
+            root: self.KeepService(root, self._user_agent_pool,
+                                   upload_counter=self.upload_counter,
+                                   download_counter=self.download_counter)
             for root in hint_roots
         }
 
@@ -937,6 +1004,8 @@ class KeepClient(object):
         elif not isinstance(data, str):
             raise arvados.errors.ArgumentError("Argument 'data' to KeepClient.put is not type 'str'")
 
+        self.put_counter.add(1)
+
         data_hash = hashlib.md5(data).hexdigest()
         loc_s = data_hash + '+' + str(len(data))
         if copies < 1:
@@ -947,20 +1016,22 @@ class KeepClient(object):
         # Tell the proxy how many copies we want it to store
         headers['X-Keep-Desired-Replication'] = str(copies)
         roots_map = {}
-        thread_limiter = KeepClient.ThreadLimiter(1 if self.max_replicas_per_service is None else copies)
         loop = retry.RetryLoop(num_retries, self._check_loop_result,
                                backoff_start=2)
         for tries_left in loop:
             try:
-                local_roots = self.map_new_services(
+                sorted_roots = self.map_new_services(
                     roots_map, locator,
                     force_rebuild=(tries_left < num_retries), need_writable=True, **headers)
             except Exception as error:
                 loop.save_result(error)
                 continue
 
+            thread_limiter = KeepClient.ThreadLimiter(
+                copies, self.max_replicas_per_service)
             threads = []
-            for service_root, ks in roots_map.iteritems():
+            for service_root, ks in [(root, roots_map[root])
+                                     for root in sorted_roots]:
                 if ks.finished():
                     continue
                 t = KeepClient.KeepWriterThread(
@@ -969,7 +1040,8 @@ class KeepClient(object):
                     data_hash=data_hash,
                     service_root=service_root,
                     thread_limiter=thread_limiter,
-                    timeout=self.current_timeout(num_retries-tries_left))
+                    timeout=self.current_timeout(num_retries-tries_left),
+                    thread_sequence=len(threads))
                 t.start()
                 threads.append(t)
             for t in threads:
@@ -984,7 +1056,7 @@ class KeepClient(object):
                     data_hash, loop.last_result()))
         else:
             service_errors = ((key, roots_map[key].last_result()['error'])
-                              for key in local_roots
+                              for key in sorted_roots
                               if roots_map[key].last_result()['error'])
             raise arvados.errors.KeepWriteError(
                 "failed to write {} (wanted {} copies but wrote {})".format(
index 23a3793e426bb37d87998d2b1ed651e2331d38ed..b589b4858309a4b33d15df8cb7be7b71d6293e20 100644 (file)
@@ -35,6 +35,9 @@ setup(name='arvados-python-client',
           'bin/arv-run',
           'bin/arv-ws'
       ],
+      data_files=[
+          ('share/doc/arvados-python-client', ['LICENSE-2.0.txt', 'README.rst']),
+      ],
       install_requires=[
           'ciso8601',
           'google-api-python-client',
index 6e2a07888662172fd6f26b335a0206db4ef15e98..b2cf43652bce288858dca4a1db1a121b040af9ec 100644 (file)
@@ -43,6 +43,12 @@ def mock_responses(body, *codes, **headers):
     return mock.patch('httplib2.Http.request', side_effect=queue_with((
         (fake_httplib2_response(code, **headers), body) for code in codes)))
 
+def mock_api_responses(api_client, body, codes, headers={}):
+    return mock.patch.object(api_client._http, 'request', side_effect=queue_with((
+        (fake_httplib2_response(code, **headers), body) for code in codes)))
+
+def str_keep_locator(s):
+    return '{}+{}'.format(hashlib.md5(s).hexdigest(), len(s))
 
 class FakeCurl:
     @classmethod
@@ -122,8 +128,7 @@ class MockStreamReader(object):
     def __init__(self, name='.', *data):
         self._name = name
         self._data = ''.join(data)
-        self._data_locators = ['{}+{}'.format(hashlib.md5(d).hexdigest(),
-                                              len(d)) for d in data]
+        self._data_locators = [str_keep_locator(d) for d in data]
         self.num_retries = 0
 
     def name(self):
index ef724ed5c52130c76ca878a60d94a150ed044c12..f074f8d6cf67ee768a4f09e2515a850cac5e4c5c 100644 (file)
@@ -22,7 +22,12 @@ class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, object):
             'response_body': 0,
             # before returning from handler (thus setting response EOF)
             'response_close': 0,
+            # after writing over 1s worth of data at self.bandwidth
+            'mid_write': 0,
+            # after reading over 1s worth of data at self.bandwidth
+            'mid_read': 0,
         }
+        self.bandwidth = None
         super(Server, self).__init__(*args, **kwargs)
 
     def setdelays(self, **kwargs):
@@ -31,6 +36,12 @@ class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, object):
             self.delays.get(k) # NameError if unknown key
             self.delays[k] = v
 
+    def setbandwidth(self, bandwidth):
+        """For future requests, set the maximum bandwidth (number of bytes per
+        second) to operate at. If setbandwidth is never called, function at
+        maximum bandwidth possible"""
+        self.bandwidth = float(bandwidth)
+
     def _sleep_at_least(self, seconds):
         """Sleep for given time, even if signals are received."""
         wake = time.time() + seconds
@@ -44,6 +55,53 @@ class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, object):
 
 
 class Handler(BaseHTTPServer.BaseHTTPRequestHandler, object):
+    def wfile_bandwidth_write(self, data_to_write):
+        if self.server.bandwidth == None and self.server.delays['mid_write'] == 0:
+            self.wfile.write(data_to_write)
+        else:
+            BYTES_PER_WRITE = int(self.server.bandwidth/4.0) or 32768
+            outage_happened = False
+            num_bytes = len(data_to_write)
+            num_sent_bytes = 0
+            target_time = time.time()
+            while num_sent_bytes < num_bytes:
+                if num_sent_bytes > self.server.bandwidth and not outage_happened:
+                    self.server._do_delay('mid_write')
+                    target_time += self.delays['mid_write']
+                    outage_happened = True
+                num_write_bytes = min(BYTES_PER_WRITE,
+                    num_bytes - num_sent_bytes)
+                self.wfile.write(data_to_write[
+                    num_sent_bytes:num_sent_bytes+num_write_bytes])
+                num_sent_bytes += num_write_bytes
+                if self.server.bandwidth is not None:
+                    target_time += num_write_bytes / self.server.bandwidth
+                    self.server._sleep_at_least(target_time - time.time())
+        return None
+
+    def rfile_bandwidth_read(self, bytes_to_read):
+        if self.server.bandwidth == None and self.server.delays['mid_read'] == 0:
+            return self.rfile.read(bytes_to_read)
+        else:
+            BYTES_PER_READ = int(self.server.bandwidth/4.0) or 32768
+            data = ''
+            outage_happened = False
+            bytes_read = 0
+            target_time = time.time()
+            while bytes_to_read > bytes_read:
+                if bytes_read > self.server.bandwidth and not outage_happened:
+                    self.server._do_delay('mid_read')
+                    target_time += self.delays['mid_read']
+                    outage_happened = True
+                next_bytes_to_read = min(BYTES_PER_READ,
+                    bytes_to_read - bytes_read)
+                data += self.rfile.read(next_bytes_to_read)
+                bytes_read += next_bytes_to_read
+                if self.server.bandwidth is not None:
+                    target_time += next_bytes_to_read / self.server.bandwidth
+                    self.server._sleep_at_least(target_time - time.time())
+        return data
+
     def handle(self, *args, **kwargs):
         self.server._do_delay('request')
         return super(Handler, self).handle(*args, **kwargs)
@@ -60,21 +118,18 @@ class Handler(BaseHTTPServer.BaseHTTPRequestHandler, object):
         self.send_header('Content-type', 'application/octet-stream')
         self.end_headers()
         self.server._do_delay('response_body')
-        self.wfile.write(self.server.store[datahash])
+        self.wfile_bandwidth_write(self.server.store[datahash])
         self.server._do_delay('response_close')
 
     def do_PUT(self):
         self.server._do_delay('request_body')
-
         # The comments at https://bugs.python.org/issue1491 implies that Python
         # 2.7 BaseHTTPRequestHandler was patched to support 100 Continue, but
         # reading the actual code that ships in Debian it clearly is not, so we
         # need to send the response on the socket directly.
-
-        self.wfile.write("%s %d %s\r\n\r\n" %
+        self.wfile_bandwidth_write("%s %d %s\r\n\r\n" %
                          (self.protocol_version, 100, "Continue"))
-
-        data = self.rfile.read(int(self.headers.getheader('content-length')))
+        data = self.rfile_bandwidth_read(int(self.headers.getheader('content-length')))
         datahash = hashlib.md5(data).hexdigest()
         self.server.store[datahash] = data
         self.server._do_delay('response')
@@ -82,7 +137,7 @@ class Handler(BaseHTTPServer.BaseHTTPRequestHandler, object):
         self.send_header('Content-type', 'text/plain')
         self.end_headers()
         self.server._do_delay('response_body')
-        self.wfile.write(datahash + '+' + str(len(data)))
+        self.wfile_bandwidth_write(datahash + '+' + str(len(data)))
         self.server._do_delay('response_close')
 
     def log_request(self, *args, **kwargs):
index 2d8e4759d695a4538def2aad1b6c3794daa3f073..8f0abd245ba752cd778473f0951f1b89a38cc868 100644 (file)
@@ -1,6 +1,5 @@
 import arvados
 import arvados_testutil as tutil
-import hashlib
 
 class ManifestExamples(object):
     def make_manifest(self,
@@ -9,8 +8,7 @@ class ManifestExamples(object):
                       files_per_stream=1,
                       streams=1):
         datablip = 'x' * bytes_per_block
-        data_loc = '{}+{}'.format(hashlib.md5(datablip).hexdigest(),
-                                  bytes_per_block)
+        data_loc = tutil.str_keep_locator(datablip)
         with tutil.mock_keep_responses(data_loc, 200):
             coll = arvados.CollectionWriter()
             for si in range(0, streams):
index 61966054a08f1cb15c4e000ca47e302b5dd7a248..2b8b6ca1c4ad531c29bfd1c9a149da7c9bdf3599 100644 (file)
@@ -3,7 +3,7 @@ error_log stderr info;          # Yes, must be specified here _and_ cmdline
 events {
 }
 http {
-  access_log /dev/stderr combined;
+  access_log {{ACCESSLOG}} combined;
   upstream arv-git-http {
     server localhost:{{GITPORT}};
   }
@@ -28,4 +28,30 @@ http {
       proxy_pass http://keepproxy;
     }
   }
+  upstream keep-web {
+    server localhost:{{KEEPWEBPORT}};
+  }
+  server {
+    listen *:{{KEEPWEBSSLPORT}} ssl default_server;
+    server_name ~^(?<request_host>.*)$;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keep-web;
+      proxy_set_header Host $request_host:{{KEEPWEBPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+  }
+  server {
+    listen *:{{KEEPWEBDLSSLPORT}} ssl default_server;
+    server_name ~.*;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keep-web;
+      proxy_set_header Host download:{{KEEPWEBPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+      proxy_redirect //download:{{KEEPWEBPORT}}/ https://$host:{{KEEPWEBDLSSLPORT}}/;
+    }
+  }
 }
index 5d0c42ad2109e2d605f5ab45fea5bd64fc26b1e8..d6febdda6cda29d9dced095a7e6235789b9e891a 100644 (file)
@@ -3,6 +3,7 @@
 from __future__ import print_function
 import argparse
 import atexit
+import errno
 import httplib2
 import os
 import pipes
@@ -31,7 +32,6 @@ import arvados.config
 
 ARVADOS_DIR = os.path.realpath(os.path.join(MY_DIRNAME, '../../..'))
 SERVICES_SRC_DIR = os.path.join(ARVADOS_DIR, 'services')
-SERVER_PID_PATH = 'tmp/pids/test-server.pid'
 if 'GOPATH' in os.environ:
     gopaths = os.environ['GOPATH'].split(':')
     gobins = [os.path.join(path, 'bin') for path in gopaths]
@@ -54,9 +54,7 @@ def find_server_pid(PID_PATH, wait=10):
             with open(PID_PATH, 'r') as f:
                 server_pid = int(f.read())
             good_pid = (os.kill(server_pid, 0) is None)
-        except IOError:
-            good_pid = False
-        except OSError:
+        except EnvironmentError:
             good_pid = False
         now = time.time()
 
@@ -71,30 +69,65 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
     import signal
     import subprocess
     import time
-    try:
-        if passenger_root:
-            # First try to shut down nicely
-            restore_cwd = os.getcwd()
-            os.chdir(passenger_root)
-            subprocess.call([
-                'bundle', 'exec', 'passenger', 'stop', '--pid-file', pidfile])
-            os.chdir(restore_cwd)
-        now = time.time()
-        timeout = now + wait
-        with open(pidfile, 'r') as f:
-            server_pid = int(f.read())
-        while now <= timeout:
-            if not passenger_root or timeout - now < wait / 2:
-                # Half timeout has elapsed. Start sending SIGTERM
-                os.kill(server_pid, signal.SIGTERM)
-            # Raise OSError if process has disappeared
-            os.getpgid(server_pid)
+
+    now = time.time()
+    startTERM = now
+    deadline = now + wait
+
+    if passenger_root:
+        # First try to shut down nicely
+        restore_cwd = os.getcwd()
+        os.chdir(passenger_root)
+        subprocess.call([
+            'bundle', 'exec', 'passenger', 'stop', '--pid-file', pidfile])
+        os.chdir(restore_cwd)
+        # Use up to half of the +wait+ period waiting for "passenger
+        # stop" to work. If the process hasn't exited by then, start
+        # sending TERM signals.
+        startTERM += wait/2
+
+    server_pid = None
+    while now <= deadline and server_pid is None:
+        try:
+            with open(pidfile, 'r') as f:
+                server_pid = int(f.read())
+        except IOError:
+            # No pidfile = nothing to kill.
+            return
+        except ValueError as error:
+            # Pidfile exists, but we can't parse it. Perhaps the
+            # server has created the file but hasn't written its PID
+            # yet?
+            print("Parse error reading pidfile {}: {}".format(pidfile, error),
+                  file=sys.stderr)
             time.sleep(0.1)
             now = time.time()
-    except IOError:
-        pass
-    except OSError:
-        pass
+
+    while now <= deadline:
+        try:
+            exited, _ = os.waitpid(server_pid, os.WNOHANG)
+            if exited > 0:
+                return
+        except OSError:
+            # already exited, or isn't our child process
+            pass
+        try:
+            if now >= startTERM:
+                os.kill(server_pid, signal.SIGTERM)
+                print("Sent SIGTERM to {} ({})".format(server_pid, pidfile),
+                      file=sys.stderr)
+        except OSError as error:
+            if error.errno == errno.ESRCH:
+                # Thrown by os.getpgid() or os.kill() if the process
+                # does not exist, i.e., our work here is done.
+                return
+            raise
+        time.sleep(0.1)
+        now = time.time()
+
+    print("Server PID {} ({}) did not exit, giving up after {}s".
+          format(server_pid, pidfile, wait),
+          file=sys.stderr)
 
 def find_available_port():
     """Return an IPv4 port number that is not in use right now.
@@ -125,7 +158,8 @@ def _wait_until_port_listens(port, timeout=10):
         subprocess.check_output(['which', 'lsof'])
     except subprocess.CalledProcessError:
         print("WARNING: No `lsof` -- cannot wait for port to listen. "+
-              "Sleeping 0.5 and hoping for the best.")
+              "Sleeping 0.5 and hoping for the best.",
+              file=sys.stderr)
         time.sleep(0.5)
         return
     deadline = time.time() + timeout
@@ -142,6 +176,26 @@ def _wait_until_port_listens(port, timeout=10):
         format(port, timeout),
         file=sys.stderr)
 
+def _fifo2stderr(label):
+    """Create a fifo, and copy it to stderr, prepending label to each line.
+
+    Return value is the path to the new FIFO.
+
+    +label+ should contain only alphanumerics: it is also used as part
+    of the FIFO filename.
+    """
+    fifo = os.path.join(TEST_TMPDIR, label+'.fifo')
+    try:
+        os.remove(fifo)
+    except OSError as error:
+        if error.errno != errno.ENOENT:
+            raise
+    os.mkfifo(fifo, 0700)
+    subprocess.Popen(
+        ['sed', '-e', 's/^/['+label+'] /', fifo],
+        stdout=sys.stderr)
+    return fifo
+
 def run(leave_running_atexit=False):
     """Ensure an API server is running, and ARVADOS_API_* env vars have
     admin credentials for it.
@@ -162,7 +216,7 @@ def run(leave_running_atexit=False):
     # Delete cached discovery document.
     shutil.rmtree(arvados.http_cache('discovery'))
 
-    pid_file = os.path.join(SERVICES_SRC_DIR, 'api', SERVER_PID_PATH)
+    pid_file = _pidfile('api')
     pid_file_ok = find_server_pid(pid_file, 0)
 
     existing_api_host = os.environ.get('ARVADOS_TEST_API_HOST', my_api_host)
@@ -234,7 +288,7 @@ def run(leave_running_atexit=False):
     start_msg = subprocess.check_output(
         ['bundle', 'exec',
          'passenger', 'start', '-d', '-p{}'.format(port),
-         '--pid-file', os.path.join(os.getcwd(), pid_file),
+         '--pid-file', pid_file,
          '--log-file', os.path.join(os.getcwd(), 'log/test.log'),
          '--ssl',
          '--ssl-certificate', 'tmp/self-signed.pem',
@@ -296,7 +350,7 @@ def stop(force=False):
     """
     global my_api_host
     if force or my_api_host is not None:
-        kill_server_pid(os.path.join(SERVICES_SRC_DIR, 'api', SERVER_PID_PATH))
+        kill_server_pid(_pidfile('api'))
         my_api_host = None
 
 def _start_keep(n, keep_args):
@@ -310,9 +364,10 @@ def _start_keep(n, keep_args):
     for arg, val in keep_args.iteritems():
         keep_cmd.append("{}={}".format(arg, val))
 
-    logf = open(os.path.join(TEST_TMPDIR, 'keep{}.log'.format(n)), 'a+')
+    logf = open(_fifo2stderr('keep{}'.format(n)), 'w')
     kp0 = subprocess.Popen(
         keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+
     with open(_pidfile('keep{}'.format(n)), 'w') as f:
         f.write(str(kp0.pid))
 
@@ -323,8 +378,8 @@ def _start_keep(n, keep_args):
 
     return port
 
-def run_keep(blob_signing_key=None, enforce_permissions=False):
-    stop_keep()
+def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
+    stop_keep(num_servers)
 
     keep_args = {}
     if not blob_signing_key:
@@ -336,7 +391,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
         keep_args['-enforce-permissions'] = 'true'
     with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
         keep_args['-data-manager-token-file'] = f.name
-        f.write(os.environ['ARVADOS_API_TOKEN'])
+        f.write(auth_token('data_manager'))
     keep_args['-never-delete'] = 'false'
 
     api = arvados.api(
@@ -344,12 +399,13 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
         host=os.environ['ARVADOS_API_HOST'],
         token=os.environ['ARVADOS_API_TOKEN'],
         insecure=True)
-    for d in api.keep_services().list().execute()['items']:
+
+    for d in api.keep_services().list(filters=[['service_type','=','disk']]).execute()['items']:
         api.keep_services().delete(uuid=d['uuid']).execute()
     for d in api.keep_disks().list().execute()['items']:
         api.keep_disks().delete(uuid=d['uuid']).execute()
 
-    for d in range(0, 2):
+    for d in range(0, num_servers):
         port = _start_keep(d, keep_args)
         svc = api.keep_services().create(body={'keep_service': {
             'uuid': 'zzzzz-bi6l4-keepdisk{:07d}'.format(d),
@@ -362,8 +418,14 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
             'keep_disk': {'keep_service_uuid': svc['uuid'] }
         }).execute()
 
+    # If keepproxy is running, send SIGHUP to make it discover the new
+    # keepstore services.
+    proxypidfile = _pidfile('keepproxy')
+    if os.path.exists(proxypidfile):
+        os.kill(int(open(proxypidfile).read()), signal.SIGHUP)
+
 def _stop_keep(n):
-    kill_server_pid(_pidfile('keep{}'.format(n)), 0)
+    kill_server_pid(_pidfile('keep{}'.format(n)))
     if os.path.exists("{}/keep{}.volume".format(TEST_TMPDIR, n)):
         with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'r') as r:
             shutil.rmtree(r.read(), True)
@@ -371,29 +433,29 @@ def _stop_keep(n):
     if os.path.exists(os.path.join(TEST_TMPDIR, "keep.blob_signing_key")):
         os.remove(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"))
 
-def stop_keep():
-    _stop_keep(0)
-    _stop_keep(1)
+def stop_keep(num_servers=2):
+    for n in range(0, num_servers):
+        _stop_keep(n)
 
 def run_keep_proxy():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     stop_keep_proxy()
 
-    admin_token = auth_token('admin')
     port = find_available_port()
     env = os.environ.copy()
-    env['ARVADOS_API_TOKEN'] = admin_token
+    env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
+    logf = open(_fifo2stderr('keepproxy'), 'w')
     kp = subprocess.Popen(
         ['keepproxy',
          '-pid='+_pidfile('keepproxy'),
          '-listen=:{}'.format(port)],
-        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
 
     api = arvados.api(
         version='v1',
         host=os.environ['ARVADOS_API_HOST'],
-        token=admin_token,
+        token=auth_token('admin'),
         insecure=True)
     for d in api.keep_services().list(
             filters=[['service_type','=','proxy']]).execute()['items']:
@@ -411,7 +473,7 @@ def run_keep_proxy():
 def stop_keep_proxy():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('keepproxy'), wait=0)
+    kill_server_pid(_pidfile('keepproxy'))
 
 def run_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
@@ -422,11 +484,12 @@ def run_arv_git_httpd():
     gitport = find_available_port()
     env = os.environ.copy()
     env.pop('ARVADOS_API_TOKEN', None)
+    logf = open(_fifo2stderr('arv-git-httpd'), 'w')
     agh = subprocess.Popen(
         ['arv-git-httpd',
          '-repo-root='+gitdir+'/test',
          '-address=:'+str(gitport)],
-        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('arv-git-httpd'), 'w') as f:
         f.write(str(agh.pid))
     _setport('arv-git-httpd', gitport)
@@ -435,18 +498,47 @@ def run_arv_git_httpd():
 def stop_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
+    kill_server_pid(_pidfile('arv-git-httpd'))
+
+def run_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_keep_web()
+
+    keepwebport = find_available_port()
+    env = os.environ.copy()
+    env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
+    logf = open(_fifo2stderr('keep-web'), 'w')
+    keepweb = subprocess.Popen(
+        ['keep-web',
+         '-allow-anonymous',
+         '-attachment-only-host=download:'+str(keepwebport),
+         '-listen=:'+str(keepwebport)],
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
+    with open(_pidfile('keep-web'), 'w') as f:
+        f.write(str(keepweb.pid))
+    _setport('keep-web', keepwebport)
+    _wait_until_port_listens(keepwebport)
+
+def stop_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keep-web'))
 
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     nginxconf = {}
+    nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBDLSSLPORT'] = find_available_port()
+    nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
     nginxconf['GITPORT'] = _getport('arv-git-httpd')
     nginxconf['GITSSLPORT'] = find_available_port()
     nginxconf['SSLCERT'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.pem')
     nginxconf['SSLKEY'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.key')
+    nginxconf['ACCESSLOG'] = _fifo2stderr('nginx_access_log')
 
     conftemplatefile = os.path.join(MY_DIRNAME, 'nginx.conf')
     conffile = os.path.join(TEST_TMPDIR, 'nginx.conf')
@@ -458,19 +550,22 @@ def run_nginx():
 
     env = os.environ.copy()
     env['PATH'] = env['PATH']+':/sbin:/usr/sbin:/usr/local/sbin'
+
     nginx = subprocess.Popen(
         ['nginx',
          '-g', 'error_log stderr info;',
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keep-web-dl-ssl', nginxconf['KEEPWEBDLSSLPORT'])
+    _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
 
 def stop_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('nginx'), wait=0)
+    kill_server_pid(_pidfile('nginx'))
 
 def _pidfile(program):
     return os.path.join(TEST_TMPDIR, program + '.pid')
@@ -545,6 +640,7 @@ class TestCaseWithServers(unittest.TestCase):
     MAIN_SERVER = None
     KEEP_SERVER = None
     KEEP_PROXY_SERVER = None
+    KEEP_WEB_SERVER = None
 
     @staticmethod
     def _restore_dict(src, dest):
@@ -563,7 +659,8 @@ class TestCaseWithServers(unittest.TestCase):
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
                 (cls.KEEP_SERVER, run_keep, stop_keep),
-                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy)):
+                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
+                (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
             if server_kwargs is not None:
                 start_func(**server_kwargs)
                 cls._cleanup_funcs.append(stop_func)
@@ -589,16 +686,22 @@ if __name__ == "__main__":
         'start', 'stop',
         'start_keep', 'stop_keep',
         'start_keep_proxy', 'stop_keep_proxy',
+        'start_keep-web', 'stop_keep-web',
         'start_arv-git-httpd', 'stop_arv-git-httpd',
         'start_nginx', 'stop_nginx',
     ]
     parser = argparse.ArgumentParser()
     parser.add_argument('action', type=str, help="one of {}".format(actions))
     parser.add_argument('--auth', type=str, metavar='FIXTURE_NAME', help='Print authorization info for given api_client_authorizations fixture')
+    parser.add_argument('--num-keep-servers', metavar='int', type=int, default=2, help="Number of keep servers desired")
+    parser.add_argument('--keep-enforce-permissions', action="store_true", help="Enforce keep permissions")
+
     args = parser.parse_args()
 
     if args.action not in actions:
-        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions), file=sys.stderr)
+        print("Unrecognized action '{}'. Actions are: {}.".
+              format(args.action, actions),
+              file=sys.stderr)
         sys.exit(1)
     if args.action == 'start':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
@@ -614,9 +717,9 @@ if __name__ == "__main__":
     elif args.action == 'stop':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
     elif args.action == 'start_keep':
-        run_keep()
+        run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
     elif args.action == 'stop_keep':
-        stop_keep()
+        stop_keep(num_servers=args.num_keep_servers)
     elif args.action == 'start_keep_proxy':
         run_keep_proxy()
     elif args.action == 'stop_keep_proxy':
@@ -625,6 +728,10 @@ if __name__ == "__main__":
         run_arv_git_httpd()
     elif args.action == 'stop_arv-git-httpd':
         stop_arv_git_httpd()
+    elif args.action == 'start_keep-web':
+        run_keep_web()
+    elif args.action == 'stop_keep-web':
+        stop_keep_web()
     elif args.action == 'start_nginx':
         run_nginx()
     elif args.action == 'stop_nginx':
index 9d438e2e038ecca70225009d261e5229d61a81c3..795a9aa22a2b0ad161a6bf1ee26228ca1ba771f5 100644 (file)
@@ -3,63 +3,36 @@
 import arvados
 import collections
 import httplib2
+import itertools
 import json
 import mimetypes
 import os
-import run_test_server
+import socket
 import string
 import unittest
+
+import mock
+import run_test_server
+
 from apiclient import errors as apiclient_errors
 from apiclient import http as apiclient_http
-from arvados.api import OrderedJsonModel
-
-from arvados_testutil import fake_httplib2_response
+from arvados.api import OrderedJsonModel, RETRY_DELAY_INITIAL, RETRY_DELAY_BACKOFF, RETRY_COUNT
+from arvados_testutil import fake_httplib2_response, queue_with
 
 if not mimetypes.inited:
     mimetypes.init()
 
-class ArvadosApiClientTest(unittest.TestCase):
+class ArvadosApiTest(run_test_server.TestCaseWithServers):
+    MAIN_SERVER = {}
     ERROR_HEADERS = {'Content-Type': mimetypes.types_map['.json']}
 
-    @classmethod
-    def api_error_response(cls, code, *errors):
-        return (fake_httplib2_response(code, **cls.ERROR_HEADERS),
+    def api_error_response(self, code, *errors):
+        return (fake_httplib2_response(code, **self.ERROR_HEADERS),
                 json.dumps({'errors': errors,
                             'error_token': '1234567890+12345678'}))
 
-    @classmethod
-    def setUpClass(cls):
-        # The apiclient library has support for mocking requests for
-        # testing, but it doesn't extend to the discovery document
-        # itself. For now, bring up an API server that will serve
-        # a discovery document.
-        # FIXME: Figure out a better way to stub this out.
-        run_test_server.run()
-        mock_responses = {
-            'arvados.humans.delete': (
-                fake_httplib2_response(500, **cls.ERROR_HEADERS),
-                ""),
-            'arvados.humans.get': cls.api_error_response(
-                422, "Bad UUID format", "Bad output format"),
-            'arvados.humans.list': (None, json.dumps(
-                    {'items_available': 0, 'items': []})),
-            }
-        req_builder = apiclient_http.RequestMockBuilder(mock_responses)
-        cls.api = arvados.api('v1',
-                              host=os.environ['ARVADOS_API_HOST'],
-                              token='discovery-doc-only-no-token-needed',
-                              insecure=True,
-                              requestBuilder=req_builder)
-
-    def tearDown(cls):
-        run_test_server.reset()
-
     def test_new_api_objects_with_cache(self):
-        clients = [arvados.api('v1', cache=True,
-                               host=os.environ['ARVADOS_API_HOST'],
-                               token='discovery-doc-only-no-token-needed',
-                               insecure=True)
-                   for index in [0, 1]]
+        clients = [arvados.api('v1', cache=True) for index in [0, 1]]
         self.assertIsNot(*clients)
 
     def test_empty_list(self):
@@ -92,15 +65,28 @@ class ArvadosApiClientTest(unittest.TestCase):
                     new_item['created_at']))
 
     def test_exceptions_include_errors(self):
+        mock_responses = {
+            'arvados.humans.get': self.api_error_response(
+                422, "Bad UUID format", "Bad output format"),
+            }
+        req_builder = apiclient_http.RequestMockBuilder(mock_responses)
+        api = arvados.api('v1', requestBuilder=req_builder)
         with self.assertRaises(apiclient_errors.HttpError) as err_ctx:
-            self.api.humans().get(uuid='xyz-xyz-abcdef').execute()
+            api.humans().get(uuid='xyz-xyz-abcdef').execute()
         err_s = str(err_ctx.exception)
         for msg in ["Bad UUID format", "Bad output format"]:
             self.assertIn(msg, err_s)
 
     def test_exceptions_without_errors_have_basic_info(self):
+        mock_responses = {
+            'arvados.humans.delete': (
+                fake_httplib2_response(500, **self.ERROR_HEADERS),
+                "")
+            }
+        req_builder = apiclient_http.RequestMockBuilder(mock_responses)
+        api = arvados.api('v1', requestBuilder=req_builder)
         with self.assertRaises(apiclient_errors.HttpError) as err_ctx:
-            self.api.humans().delete(uuid='xyz-xyz-abcdef').execute()
+            api.humans().delete(uuid='xyz-xyz-abcdef').execute()
         self.assertIn("500", str(err_ctx.exception))
 
     def test_request_too_large(self):
@@ -117,14 +103,84 @@ class ArvadosApiClientTest(unittest.TestCase):
             }
         req_builder = apiclient_http.RequestMockBuilder(mock_responses)
         api = arvados.api('v1',
-                          host=os.environ['ARVADOS_API_HOST'],
-                          token='discovery-doc-only-no-token-needed',
-                          insecure=True,
-                          requestBuilder=req_builder,
-                          model=OrderedJsonModel())
+                          requestBuilder=req_builder, model=OrderedJsonModel())
         result = api.humans().get(uuid='test').execute()
         self.assertEqual(string.hexdigits, ''.join(result.keys()))
 
 
+class RetryREST(unittest.TestCase):
+    def setUp(self):
+        self.api = arvados.api('v1')
+        self.assertTrue(hasattr(self.api._http, 'orig_http_request'),
+                        "test doesn't know how to intercept HTTP requests")
+        self.mock_response = {'user': 'person'}
+        self.request_success = (fake_httplib2_response(200),
+                                json.dumps(self.mock_response))
+        self.api._http.orig_http_request = mock.MagicMock()
+        # All requests succeed by default. Tests override as needed.
+        self.api._http.orig_http_request.return_value = self.request_success
+
+    @mock.patch('time.sleep')
+    def test_socket_error_retry_get(self, sleep):
+        self.api._http.orig_http_request.side_effect = (
+            socket.error('mock error'),
+            self.request_success,
+        )
+        self.assertEqual(self.api.users().current().execute(),
+                         self.mock_response)
+        self.assertGreater(self.api._http.orig_http_request.call_count, 1,
+                           "client got the right response without retrying")
+        self.assertEqual(sleep.call_args_list,
+                         [mock.call(RETRY_DELAY_INITIAL)])
+
+    @mock.patch('time.sleep')
+    def test_socket_error_retry_delay(self, sleep):
+        self.api._http.orig_http_request.side_effect = socket.error('mock')
+        self.api._http._retry_count = 3
+        with self.assertRaises(socket.error):
+            self.api.users().current().execute()
+        self.assertEqual(self.api._http.orig_http_request.call_count, 4)
+        self.assertEqual(sleep.call_args_list, [
+            mock.call(RETRY_DELAY_INITIAL),
+            mock.call(RETRY_DELAY_INITIAL * RETRY_DELAY_BACKOFF),
+            mock.call(RETRY_DELAY_INITIAL * RETRY_DELAY_BACKOFF**2),
+        ])
+
+    @mock.patch('time.time', side_effect=[i*2**20 for i in range(99)])
+    def test_close_old_connections_non_retryable(self, sleep):
+        self._test_connection_close(expect=1)
+
+    @mock.patch('time.time', side_effect=itertools.count())
+    def test_no_close_fresh_connections_non_retryable(self, sleep):
+        self._test_connection_close(expect=0)
+
+    @mock.patch('time.time', side_effect=itertools.count())
+    def test_override_max_idle_time(self, sleep):
+        self.api._http._max_keepalive_idle = 0
+        self._test_connection_close(expect=1)
+
+    def _test_connection_close(self, expect=0):
+        # Do two POST requests. The second one must close all
+        # connections +expect+ times.
+        self.api.users().create(body={}).execute()
+        mock_conns = {str(i): mock.MagicMock() for i in range(2)}
+        self.api._http.connections = mock_conns.copy()
+        self.api.users().create(body={}).execute()
+        for c in mock_conns.itervalues():
+            self.assertEqual(c.close.call_count, expect)
+
+    @mock.patch('time.sleep')
+    def test_socket_error_no_retry_post(self, sleep):
+        self.api._http.orig_http_request.side_effect = (
+            socket.error('mock error'),
+            self.request_success,
+        )
+        with self.assertRaises(socket.error):
+            self.api.users().create(body={}).execute()
+        self.assertEqual(self.api._http.orig_http_request.call_count, 1,
+                         "client should try non-retryable method exactly once")
+        self.assertEqual(sleep.call_args_list, [])
+
+
 if __name__ == '__main__':
     unittest.main()
index 90bbacfe5af7146ffa4ec3924c00e1b44e0d6715..664b57fc00a57cef068e352232d55d0dfa548a58 100644 (file)
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import hashlib
 import io
 import random
 
@@ -11,6 +10,8 @@ import arvados.errors as arv_error
 import arvados.commands.ls as arv_ls
 import run_test_server
 
+from arvados_testutil import str_keep_locator
+
 class ArvLsTestCase(run_test_server.TestCaseWithServers):
     FAKE_UUID = 'zzzzz-4zz18-12345abcde12345'
 
@@ -24,8 +25,7 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
 
     def mock_api_for_manifest(self, manifest_lines, uuid=FAKE_UUID):
         manifest_text = self.newline_join(manifest_lines)
-        pdh = '{}+{}'.format(hashlib.md5(manifest_text).hexdigest(),
-                             len(manifest_text))
+        pdh = str_keep_locator(manifest_text)
         coll_info = {'uuid': uuid,
                      'portable_data_hash': pdh,
                      'manifest_text': manifest_text}
index f4bd8b692bc2520cdda8a679abbb3c2d18ff85dd..896b880778a1b0965429420af8a6f349048ef5c9 100644 (file)
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import apiclient
+import mock
 import os
 import pwd
 import re
@@ -335,10 +336,10 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
         self.main_stderr = StringIO()
         return arv_put.main(args, self.main_stdout, self.main_stderr)
 
-    def call_main_on_test_file(self):
+    def call_main_on_test_file(self, args=[]):
         with self.make_test_file() as testfile:
             path = testfile.name
-            self.call_main_with_args(['--stream', '--no-progress'path])
+            self.call_main_with_args(['--stream', '--no-progress'] + args + [path])
         self.assertTrue(
             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
                                         '098f6bcd4621d373cade4e832627b4f6')),
@@ -381,6 +382,18 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
             arv_put.ResumeCache.CACHE_DIR = orig_cachedir
             os.chmod(cachedir, 0o700)
 
+    def test_put_block_replication(self):
+        with mock.patch('arvados.collection.KeepClient.local_store_put') as put_mock, \
+             mock.patch('arvados.commands.put.ResumeCache.load') as cache_mock:
+            cache_mock.side_effect = ValueError
+            put_mock.return_value = 'acbd18db4cc2f85cedef654fccc4a4d8+3'
+            self.call_main_on_test_file(['--replication', '1'])
+            self.call_main_on_test_file(['--replication', '4'])
+            self.call_main_on_test_file(['--replication', '5'])
+            self.assertEqual(
+                [x[-1].get('copies') for x in put_mock.call_args_list],
+                [1, 4, 5])
+
     def test_normalize(self):
         testfile1 = self.make_test_file()
         testfile2 = self.make_test_file()
index 330dd448278259aef3a686d652c81f72cdf6405e..ea8661437c04f3c0d2dec8d4a01c0c8176a2e87c 100644 (file)
@@ -6,7 +6,6 @@ import io
 import mock
 import os
 import unittest
-import hashlib
 import time
 
 import arvados
@@ -30,7 +29,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
             self.requests.append(locator)
             return self.blocks.get(locator)
         def put(self, data, num_retries=None):
-            pdh = "%s+%i" % (hashlib.md5(data).hexdigest(), len(data))
+            pdh = tutil.str_keep_locator(data)
             self.blocks[pdh] = str(data)
             return pdh
 
@@ -453,7 +452,7 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
         n = 0
         blocks = {}
         for d in ['01234', '34567', '67890']:
-            loc = '{}+{}'.format(hashlib.md5(d).hexdigest(), len(d))
+            loc = tutil.str_keep_locator(d)
             blocks[loc] = d
             stream.append(Range(loc, n, len(d)))
             n += len(d)
index ac7dd1b9f678ab6391ad71b13201374d127aaba3..bc3c0bf1afc7e292f07f34e3e8450f32b69b8b8f 100644 (file)
@@ -4,7 +4,6 @@
 
 import arvados
 import copy
-import hashlib
 import mock
 import os
 import pprint
@@ -395,7 +394,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
     def test_write_directory_tree_with_zero_recursion(self):
         cwriter = arvados.CollectionWriter(self.api_client)
         content = 'd1/d2/f3d1/f2f1'
-        blockhash = hashlib.md5(content).hexdigest() + '+' + str(len(content))
+        blockhash = tutil.str_keep_locator(content)
         cwriter.write_directory_tree(
             self.build_directory_tree(['f1', 'd1/f2', 'd1/d2/f3']),
             max_manifest_depth=0)
@@ -739,7 +738,7 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
             self.assertEqual('.', writer.current_stream_name())
             self.assertEqual('out', writer.current_file_name())
             out_file.write('test data')
-            data_loc = hashlib.md5('test data').hexdigest() + '+9'
+            data_loc = tutil.str_keep_locator('test data')
         self.assertTrue(out_file.closed, "writer file not closed after context")
         self.assertRaises(ValueError, out_file.write, 'extra text')
         with self.mock_keep(data_loc, 200) as keep_mock:
@@ -751,15 +750,15 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
         writer = arvados.CollectionWriter(client)
         with writer.open('six') as out_file:
             out_file.writelines(['12', '34', '56'])
-            data_loc = hashlib.md5('123456').hexdigest() + '+6'
+            data_loc = tutil.str_keep_locator('123456')
         with self.mock_keep(data_loc, 200) as keep_mock:
             self.assertEqual(". {} 0:6:six\n".format(data_loc),
                              writer.manifest_text())
 
     def test_open_flush(self):
         client = self.api_client_mock()
-        data_loc1 = hashlib.md5('flush1').hexdigest() + '+6'
-        data_loc2 = hashlib.md5('flush2').hexdigest() + '+6'
+        data_loc1 = tutil.str_keep_locator('flush1')
+        data_loc2 = tutil.str_keep_locator('flush2')
         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
             writer = arvados.CollectionWriter(client)
             with writer.open('flush_test') as out_file:
@@ -777,15 +776,15 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
             out_file.write('1st')
         with writer.open('.', '2') as out_file:
             out_file.write('2nd')
-        data_loc = hashlib.md5('1st2nd').hexdigest() + '+6'
+        data_loc = tutil.str_keep_locator('1st2nd')
         with self.mock_keep(data_loc, 200) as keep_mock:
             self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
                              writer.manifest_text())
 
     def test_two_opens_two_streams(self):
         client = self.api_client_mock()
-        data_loc1 = hashlib.md5('file').hexdigest() + '+4'
-        data_loc2 = hashlib.md5('indir').hexdigest() + '+5'
+        data_loc1 = tutil.str_keep_locator('file')
+        data_loc2 = tutil.str_keep_locator('indir')
         with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
             writer = arvados.CollectionWriter(client)
             with writer.open('file') as out_file:
diff --git a/sdk/python/tests/test_crunch.py b/sdk/python/tests/test_crunch.py
new file mode 100644 (file)
index 0000000..431390b
--- /dev/null
@@ -0,0 +1,27 @@
+import arvados.crunch
+import os
+import shutil
+import tempfile
+import unittest
+
+class TaskOutputDirTest(unittest.TestCase):
+    def setUp(self):
+        self.tmp = tempfile.mkdtemp()
+        os.environ['TASK_KEEPMOUNT_TMP'] = self.tmp
+
+    def tearDown(self):
+        os.environ.pop('TASK_KEEPMOUNT_TMP')
+        shutil.rmtree(self.tmp)
+
+    def test_env_var(self):
+        out = arvados.crunch.TaskOutputDir()
+        self.assertEqual(out.path, self.tmp)
+
+        with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+            f.write('{\n  "manifest_text":"",\n  "uuid":null\n}\n')
+        self.assertEqual(out.manifest_text(), '')
+
+        # Special file must be re-read on each call to manifest_text().
+        with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+            f.write(r'{"manifest_text":". unparsed 0:3:foo\n","uuid":null}')
+        self.assertEqual(out.manifest_text(), ". unparsed 0:3:foo\n")
index c44379bac79465417e9a7d128d1aa47f13d6a6fa..2b380f066641ec25ac980b6f9183df94b728bb5d 100644 (file)
@@ -29,14 +29,21 @@ class KeepTestCase(run_test_server.TestCaseWithServers):
                                              proxy='', local_store='')
 
     def test_KeepBasicRWTest(self):
+        self.assertEqual(0, self.keep_client.upload_counter.get())
         foo_locator = self.keep_client.put('foo')
         self.assertRegexpMatches(
             foo_locator,
             '^acbd18db4cc2f85cedef654fccc4a4d8\+3',
             'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
+
+        # 6 bytes because uploaded 2 copies
+        self.assertEqual(6, self.keep_client.upload_counter.get())
+
+        self.assertEqual(0, self.keep_client.download_counter.get())
         self.assertEqual(self.keep_client.get(foo_locator),
                          'foo',
                          'wrong content from Keep.get(md5("foo"))')
+        self.assertEqual(3, self.keep_client.download_counter.get())
 
     def test_KeepBinaryRWTest(self):
         blob_str = '\xff\xfe\xf7\x00\x01\x02'
@@ -287,8 +294,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
                 int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
             self.assertEqual(
-                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
-                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]*1000))
+                mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
 
     def test_put_timeout(self):
         api_client = self.mock_keep_services(count=1)
@@ -301,8 +311,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
                 int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
             self.assertEqual(
-                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
-                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]*1000))
+                mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[2]))
 
     def test_proxy_get_timeout(self):
         api_client = self.mock_keep_services(service_type='proxy', count=1)
@@ -315,8 +328,11 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
                 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
             self.assertEqual(
-                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
-                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
+                mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
 
     def test_proxy_put_timeout(self):
         api_client = self.mock_keep_services(service_type='proxy', count=1)
@@ -329,42 +345,194 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
                 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
             self.assertEqual(
-                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
-                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
+                mock.responses[0].getopt(pycurl.LOW_SPEED_TIME),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
+
+    def check_no_services_error(self, verb, exc_class):
+        api_client = mock.MagicMock(name='api_client')
+        api_client.keep_services().accessible().execute.side_effect = (
+            arvados.errors.ApiError)
+        keep_client = arvados.KeepClient(api_client=api_client)
+        with self.assertRaises(exc_class) as err_check:
+            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
+        self.assertEqual(0, len(err_check.exception.request_errors()))
+
+    def test_get_error_with_no_services(self):
+        self.check_no_services_error('get', arvados.errors.KeepReadError)
 
-    def test_probe_order_reference_set(self):
+    def test_put_error_with_no_services(self):
+        self.check_no_services_error('put', arvados.errors.KeepWriteError)
+
+    def check_errors_from_last_retry(self, verb, exc_class):
+        api_client = self.mock_keep_services(count=2)
+        req_mock = tutil.mock_keep_responses(
+            "retry error reporting test", 500, 500, 403, 403)
+        with req_mock, tutil.skip_sleep, \
+                self.assertRaises(exc_class) as err_check:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
+                                       num_retries=3)
+        self.assertEqual([403, 403], [
+                getattr(error, 'status_code', None)
+                for error in err_check.exception.request_errors().itervalues()])
+
+    def test_get_error_reflects_last_retry(self):
+        self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
+
+    def test_put_error_reflects_last_retry(self):
+        self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
+
+    def test_put_error_does_not_include_successful_puts(self):
+        data = 'partial failure test'
+        data_loc = tutil.str_keep_locator(data)
+        api_client = self.mock_keep_services(count=3)
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            keep_client.put(data)
+        self.assertEqual(2, len(exc_check.exception.request_errors()))
+
+    def test_proxy_put_with_no_writable_services(self):
+        data = 'test with no writable services'
+        data_loc = tutil.str_keep_locator(data)
+        api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+          keep_client = arvados.KeepClient(api_client=api_client)
+          keep_client.put(data)
+        self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
+        self.assertEqual(0, len(exc_check.exception.request_errors()))
+
+    def test_oddball_service_get(self):
+        body = 'oddball service get'
+        api_client = self.mock_keep_services(service_type='fancynewblobstore')
+        with tutil.mock_keep_responses(body, 200):
+            keep_client = arvados.KeepClient(api_client=api_client)
+            actual = keep_client.get(tutil.str_keep_locator(body))
+        self.assertEqual(body, actual)
+
+    def test_oddball_service_put(self):
+        body = 'oddball service put'
+        pdh = tutil.str_keep_locator(body)
+        api_client = self.mock_keep_services(service_type='fancynewblobstore')
+        with tutil.mock_keep_responses(pdh, 200):
+            keep_client = arvados.KeepClient(api_client=api_client)
+            actual = keep_client.put(body, copies=1)
+        self.assertEqual(pdh, actual)
+
+    def test_oddball_service_writer_count(self):
+        body = 'oddball service writer count'
+        pdh = tutil.str_keep_locator(body)
+        api_client = self.mock_keep_services(service_type='fancynewblobstore',
+                                             count=4)
+        headers = {'x-keep-replicas-stored': 3}
+        with tutil.mock_keep_responses(pdh, 200, 418, 418, 418,
+                                       **headers) as req_mock:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            actual = keep_client.put(body, copies=2)
+        self.assertEqual(pdh, actual)
+        self.assertEqual(1, req_mock.call_count)
+
+
+@tutil.skip_sleep
+class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock):
+
+    def setUp(self):
         # expected_order[i] is the probe order for
         # hash=md5(sprintf("%064x",i)) where there are 16 services
         # with uuid sprintf("anything-%015x",j) with j in 0..15. E.g.,
         # the first probe for the block consisting of 64 "0"
         # characters is the service whose uuid is
         # "zzzzz-bi6l4-000000000000003", so expected_order[0][0]=='3'.
-        expected_order = [
+        self.services = 16
+        self.expected_order = [
             list('3eab2d5fc9681074'),
             list('097dba52e648f1c3'),
             list('c5b4e023f8a7d691'),
             list('9d81c02e76a3bf54'),
             ]
-        hashes = [
-            hashlib.md5("{:064x}".format(x)).hexdigest()
-            for x in range(len(expected_order))]
-        api_client = self.mock_keep_services(count=16)
-        keep_client = arvados.KeepClient(api_client=api_client)
-        for i, hash in enumerate(hashes):
-            roots = keep_client.weighted_service_roots(arvados.KeepLocator(hash))
+        self.blocks = [
+            "{:064x}".format(x)
+            for x in range(len(self.expected_order))]
+        self.hashes = [
+            hashlib.md5(self.blocks[x]).hexdigest()
+            for x in range(len(self.expected_order))]
+        self.api_client = self.mock_keep_services(count=self.services)
+        self.keep_client = arvados.KeepClient(api_client=self.api_client)
+
+    def test_weighted_service_roots_against_reference_set(self):
+        # Confirm weighted_service_roots() returns the correct order
+        for i, hash in enumerate(self.hashes):
+            roots = self.keep_client.weighted_service_roots(arvados.KeepLocator(hash))
             got_order = [
                 re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1)
                 for root in roots]
-            self.assertEqual(expected_order[i], got_order)
+            self.assertEqual(self.expected_order[i], got_order)
+
+    def test_get_probe_order_against_reference_set(self):
+        self._test_probe_order_against_reference_set(
+            lambda i: self.keep_client.get(self.hashes[i], num_retries=1))
+
+    def test_put_probe_order_against_reference_set(self):
+        # copies=1 prevents the test from being sensitive to races
+        # between writer threads.
+        self._test_probe_order_against_reference_set(
+            lambda i: self.keep_client.put(self.blocks[i], num_retries=1, copies=1))
+
+    def _test_probe_order_against_reference_set(self, op):
+        for i in range(len(self.blocks)):
+            with tutil.mock_keep_responses('', *[500 for _ in range(self.services*2)]) as mock, \
+                 self.assertRaises(arvados.errors.KeepRequestError):
+                op(i)
+            got_order = [
+                re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+                for resp in mock.responses]
+            self.assertEqual(self.expected_order[i]*2, got_order)
+
+    def test_put_probe_order_multiple_copies(self):
+        for copies in range(2, 4):
+            for i in range(len(self.blocks)):
+                with tutil.mock_keep_responses('', *[500 for _ in range(self.services*3)]) as mock, \
+                     self.assertRaises(arvados.errors.KeepWriteError):
+                    self.keep_client.put(self.blocks[i], num_retries=2, copies=copies)
+                got_order = [
+                    re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+                    for resp in mock.responses]
+                # With T threads racing to make requests, the position
+                # of a given server in the sequence of HTTP requests
+                # (got_order) cannot be more than T-1 positions
+                # earlier than that server's position in the reference
+                # probe sequence (expected_order).
+                #
+                # Loop invariant: we have accounted for +pos+ expected
+                # probes, either by seeing them in +got_order+ or by
+                # putting them in +pending+ in the hope of seeing them
+                # later. As long as +len(pending)<T+, we haven't
+                # started a request too early.
+                pending = []
+                for pos, expected in enumerate(self.expected_order[i]*3):
+                    got = got_order[pos-len(pending)]
+                    while got in pending:
+                        del pending[pending.index(got)]
+                        got = got_order[pos-len(pending)]
+                    if got != expected:
+                        pending.append(expected)
+                        self.assertLess(
+                            len(pending), copies,
+                            "pending={}, with copies={}, got {}, expected {}".format(
+                                pending, copies, repr(got_order), repr(self.expected_order[i]*3)))
 
     def test_probe_waste_adding_one_server(self):
         hashes = [
             hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100)]
         initial_services = 12
-        api_client = self.mock_keep_services(count=initial_services)
-        keep_client = arvados.KeepClient(api_client=api_client)
+        self.api_client = self.mock_keep_services(count=initial_services)
+        self.keep_client = arvados.KeepClient(api_client=self.api_client)
         probes_before = [
-            keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
+            self.keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
         for added_services in range(1, 12):
             api_client = self.mock_keep_services(count=initial_services+added_services)
             keep_client = arvados.KeepClient(api_client=api_client)
@@ -399,10 +567,10 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def check_64_zeros_error_order(self, verb, exc_class):
         data = '0' * 64
         if verb == 'get':
-            data = hashlib.md5(data).hexdigest() + '+1234'
+            data = tutil.str_keep_locator(data)
         # Arbitrary port number:
         aport = random.randint(1024,65535)
-        api_client = self.mock_keep_services(service_port=aport, count=16)
+        api_client = self.mock_keep_services(service_port=aport, count=self.services)
         keep_client = arvados.KeepClient(api_client=api_client)
         with mock.patch('pycurl.Curl') as curl_mock, \
              self.assertRaises(exc_class) as err_check:
@@ -419,63 +587,14 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def test_put_error_shows_probe_order(self):
         self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
 
-    def check_no_services_error(self, verb, exc_class):
-        api_client = mock.MagicMock(name='api_client')
-        api_client.keep_services().accessible().execute.side_effect = (
-            arvados.errors.ApiError)
-        keep_client = arvados.KeepClient(api_client=api_client)
-        with self.assertRaises(exc_class) as err_check:
-            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
-        self.assertEqual(0, len(err_check.exception.request_errors()))
-
-    def test_get_error_with_no_services(self):
-        self.check_no_services_error('get', arvados.errors.KeepReadError)
-
-    def test_put_error_with_no_services(self):
-        self.check_no_services_error('put', arvados.errors.KeepWriteError)
-
-    def check_errors_from_last_retry(self, verb, exc_class):
-        api_client = self.mock_keep_services(count=2)
-        req_mock = tutil.mock_keep_responses(
-            "retry error reporting test", 500, 500, 403, 403)
-        with req_mock, tutil.skip_sleep, \
-                self.assertRaises(exc_class) as err_check:
-            keep_client = arvados.KeepClient(api_client=api_client)
-            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
-                                       num_retries=3)
-        self.assertEqual([403, 403], [
-                getattr(error, 'status_code', None)
-                for error in err_check.exception.request_errors().itervalues()])
-
-    def test_get_error_reflects_last_retry(self):
-        self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
-
-    def test_put_error_reflects_last_retry(self):
-        self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
-
-    def test_put_error_does_not_include_successful_puts(self):
-        data = 'partial failure test'
-        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
-        api_client = self.mock_keep_services(count=3)
-        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
-                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
-            keep_client = arvados.KeepClient(api_client=api_client)
-            keep_client.put(data)
-        self.assertEqual(2, len(exc_check.exception.request_errors()))
-
-    def test_proxy_put_with_no_writable_services(self):
-        data = 'test with no writable services'
-        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
-        api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
-        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
-                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
-          keep_client = arvados.KeepClient(api_client=api_client)
-          keep_client.put(data)
-        self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
-        self.assertEqual(0, len(exc_check.exception.request_errors()))
 
 class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
-    DATA = 'x' * 2**10
+    # BANDWIDTH_LOW_LIM must be less than len(DATA) so we can transfer
+    # 1s worth of data and then trigger bandwidth errors before running
+    # out of data.
+    DATA = 'x'*2**11
+    BANDWIDTH_LOW_LIM = 1024
+    TIMEOUT_TIME = 1.0
 
     class assertTakesBetween(unittest.TestCase):
         def __init__(self, tmin, tmax):
@@ -486,8 +605,22 @@ class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
             self.t0 = time.time()
 
         def __exit__(self, *args, **kwargs):
-            self.assertGreater(time.time() - self.t0, self.tmin)
-            self.assertLess(time.time() - self.t0, self.tmax)
+            # Round times to milliseconds, like CURL. Otherwise, we
+            # fail when CURL reaches a 1s timeout at 0.9998s.
+            delta = round(time.time() - self.t0, 3)
+            self.assertGreaterEqual(delta, self.tmin)
+            self.assertLessEqual(delta, self.tmax)
+
+    class assertTakesGreater(unittest.TestCase):
+        def __init__(self, tmin):
+            self.tmin = tmin
+
+        def __enter__(self):
+            self.t0 = time.time()
+
+        def __exit__(self, *args, **kwargs):
+            delta = round(time.time() - self.t0, 3)
+            self.assertGreaterEqual(delta, self.tmin)
 
     def setUp(self):
         sock = socket.socket()
@@ -507,7 +640,7 @@ class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
     def tearDown(self):
         self.server.shutdown()
 
-    def keepClient(self, timeouts=(0.1, 1.0)):
+    def keepClient(self, timeouts=(0.1, TIMEOUT_TIME, BANDWIDTH_LOW_LIM)):
         return arvados.KeepClient(
             api_client=self.api_client,
             timeout=timeouts)
@@ -522,39 +655,89 @@ class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
         )
         with self.assertTakesBetween(0.1, 0.5):
             with self.assertRaises(arvados.errors.KeepWriteError):
-                self.keepClient((0.1, 1)).put(self.DATA, copies=1, num_retries=0)
+                self.keepClient().put(self.DATA, copies=1, num_retries=0)
+
+    def test_low_bandwidth_no_delays_success(self):
+        self.server.setbandwidth(2*self.BANDWIDTH_LOW_LIM)
+        kc = self.keepClient()
+        loc = kc.put(self.DATA, copies=1, num_retries=0)
+        self.assertEqual(self.DATA, kc.get(loc, num_retries=0))
+
+    def test_too_low_bandwidth_no_delays_failure(self):
+        # Check that lessening bandwidth corresponds to failing
+        kc = self.keepClient()
+        loc = kc.put(self.DATA, copies=1, num_retries=0)
+        self.server.setbandwidth(0.5*self.BANDWIDTH_LOW_LIM)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepReadError) as e:
+                kc.get(loc, num_retries=0)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                kc.put(self.DATA, copies=1, num_retries=0)
+
+    def test_low_bandwidth_with_server_response_delay_failure(self):
+        kc = self.keepClient()
+        loc = kc.put(self.DATA, copies=1, num_retries=0)
+        self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
+        self.server.setdelays(response=self.TIMEOUT_TIME)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepReadError) as e:
+                kc.get(loc, num_retries=0)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                kc.put(self.DATA, copies=1, num_retries=0)
+
+    def test_low_bandwidth_with_server_mid_delay_failure(self):
+        kc = self.keepClient()
+        loc = kc.put(self.DATA, copies=1, num_retries=0)
+        self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
+        self.server.setdelays(mid_write=self.TIMEOUT_TIME, mid_read=self.TIMEOUT_TIME)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepReadError) as e:
+                kc.get(loc, num_retries=0)
+        with self.assertTakesGreater(self.TIMEOUT_TIME):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                kc.put(self.DATA, copies=1, num_retries=0)
 
     def test_timeout_slow_request(self):
-        self.server.setdelays(request=0.2)
-        self._test_200ms()
+        loc = self.keepClient().put(self.DATA, copies=1, num_retries=0)
+        self.server.setdelays(request=.2)
+        self._test_connect_timeout_under_200ms(loc)
+        self.server.setdelays(request=2)
+        self._test_response_timeout_under_2s(loc)
 
     def test_timeout_slow_response(self):
-        self.server.setdelays(response=0.2)
-        self._test_200ms()
+        loc = self.keepClient().put(self.DATA, copies=1, num_retries=0)
+        self.server.setdelays(response=.2)
+        self._test_connect_timeout_under_200ms(loc)
+        self.server.setdelays(response=2)
+        self._test_response_timeout_under_2s(loc)
 
     def test_timeout_slow_response_body(self):
-        self.server.setdelays(response_body=0.2)
-        self._test_200ms()
-
-    def _test_200ms(self):
-        """Connect should be t<100ms, request should be 200ms <= t < 300ms"""
+        loc = self.keepClient().put(self.DATA, copies=1, num_retries=0)
+        self.server.setdelays(response_body=.2)
+        self._test_connect_timeout_under_200ms(loc)
+        self.server.setdelays(response_body=2)
+        self._test_response_timeout_under_2s(loc)
 
+    def _test_connect_timeout_under_200ms(self, loc):
         # Allow 100ms to connect, then 1s for response. Everything
         # should work, and everything should take at least 200ms to
         # return.
-        kc = self.keepClient((.1, 1))
+        kc = self.keepClient(timeouts=(.1, 1))
         with self.assertTakesBetween(.2, .3):
-            loc = kc.put(self.DATA, copies=1, num_retries=0)
+            kc.put(self.DATA, copies=1, num_retries=0)
         with self.assertTakesBetween(.2, .3):
             self.assertEqual(self.DATA, kc.get(loc, num_retries=0))
 
-        # Allow 1s to connect, then 100ms for response. Nothing should
-        # work, and everything should take at least 100ms to return.
-        kc = self.keepClient((1, .1))
-        with self.assertTakesBetween(.1, .2):
+    def _test_response_timeout_under_2s(self, loc):
+        # Allow 10s to connect, then 1s for response. Nothing should
+        # work, and everything should take at least 1s to return.
+        kc = self.keepClient(timeouts=(10, 1))
+        with self.assertTakesBetween(1, 1.9):
             with self.assertRaises(arvados.errors.KeepReadError):
                 kc.get(loc, num_retries=0)
-        with self.assertTakesBetween(.1, .2):
+        with self.assertTakesBetween(1, 1.9):
             with self.assertRaises(arvados.errors.KeepWriteError):
                 kc.put(self.DATA, copies=1, num_retries=0)
 
diff --git a/sdk/python/tests/test_retry_job_helpers.py b/sdk/python/tests/test_retry_job_helpers.py
new file mode 100644 (file)
index 0000000..6e562a0
--- /dev/null
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+import mock
+import os
+import unittest
+import hashlib
+import run_test_server
+import json
+import arvados
+import arvados_testutil as tutil
+from apiclient import http as apiclient_http
+
+
+@tutil.skip_sleep
+class ApiClientRetryTestMixin(object):
+
+    TEST_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
+    TEST_LOCATOR = 'd41d8cd98f00b204e9800998ecf8427e+0'
+
+    @classmethod
+    def setUpClass(cls):
+        run_test_server.run()
+
+    def setUp(self):
+        # Patch arvados.api() to return our mock API, so we can mock
+        # its http requests.
+        self.api_client = arvados.api('v1', cache=False)
+        self.api_patch = mock.patch('arvados.api', return_value=self.api_client)
+        self.api_patch.start()
+
+    def tearDown(self):
+        self.api_patch.stop()
+
+    def run_method(self):
+        raise NotImplementedError("test subclasses must define run_method")
+
+    def test_immediate_success(self):
+        with tutil.mock_api_responses(self.api_client, '{}', [200]):
+            self.run_method()
+
+    def test_immediate_failure(self):
+        with tutil.mock_api_responses(self.api_client, '{}', [400]), self.assertRaises(self.DEFAULT_EXCEPTION):
+            self.run_method()
+
+    def test_retry_then_success(self):
+        with tutil.mock_api_responses(self.api_client, '{}', [500, 200]):
+            self.run_method()
+
+    def test_error_after_default_retries_exhausted(self):
+        with tutil.mock_api_responses(self.api_client, '{}', [500, 500, 500, 500, 500, 500, 200]), self.assertRaises(self.DEFAULT_EXCEPTION):
+            self.run_method()
+
+    def test_no_retry_after_immediate_success(self):
+        with tutil.mock_api_responses(self.api_client, '{}', [200, 400]):
+            self.run_method()
+
+
+class CurrentJobTestCase(ApiClientRetryTestMixin, unittest.TestCase):
+
+    DEFAULT_EXCEPTION = arvados.errors.ApiError
+
+    def setUp(self):
+        super(CurrentJobTestCase, self).setUp()
+        os.environ['JOB_UUID'] = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
+        os.environ['JOB_WORK'] = '.'
+
+    def tearDown(self):
+        del os.environ['JOB_UUID']
+        del os.environ['JOB_WORK']
+        arvados._current_job = None
+        super(CurrentJobTestCase, self).tearDown()
+
+    def run_method(self):
+        arvados.current_job()
+
+
+class CurrentTaskTestCase(ApiClientRetryTestMixin, unittest.TestCase):
+
+    DEFAULT_EXCEPTION = arvados.errors.ApiError
+
+    def setUp(self):
+        super(CurrentTaskTestCase, self).setUp()
+        os.environ['TASK_UUID'] = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
+        os.environ['TASK_WORK'] = '.'
+
+    def tearDown(self):
+        del os.environ['TASK_UUID']
+        del os.environ['TASK_WORK']
+        arvados._current_task = None
+        super(CurrentTaskTestCase, self).tearDown()
+
+    def run_method(self):
+        arvados.current_task()
+
+
+class TaskSetOutputTestCase(CurrentTaskTestCase, unittest.TestCase):
+
+    DEFAULT_EXCEPTION = arvados.errors.ApiError
+
+    def tearDown(self):
+        super(TaskSetOutputTestCase, self).tearDown()
+        run_test_server.reset()
+
+    def run_method(self, locator=ApiClientRetryTestMixin.TEST_LOCATOR):
+        arvados.task_set_output({'uuid':self.TEST_UUID},s=locator)
diff --git a/sdk/ruby/LICENSE-2.0.txt b/sdk/ruby/LICENSE-2.0.txt
new file mode 100644 (file)
index 0000000..d645695
--- /dev/null
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
index e092ed47a3eabeb83ea5aa16b188cb577e53716e..3adcf4d2c169ed047ef8d138829edb163c37ff9f 100644 (file)
@@ -16,7 +16,8 @@ Gem::Specification.new do |s|
   s.email       = 'gem-dev@curoverse.com'
   s.licenses    = ['Apache License, Version 2.0']
   s.files       = ["lib/arvados.rb", "lib/arvados/google_api_client.rb",
-                   "lib/arvados/collection.rb", "lib/arvados/keep.rb"]
+                   "lib/arvados/collection.rb", "lib/arvados/keep.rb",
+                   "README", "LICENSE-2.0.txt"]
   s.required_ruby_version = '>= 2.1.0'
   s.add_dependency('google-api-client', '~> 0.6.3', '>= 0.6.3')
   s.add_dependency('activesupport', '>= 3.2.13')
index 8f1b687a9cad9a01e593b08f1e34cee2d4db8e53..3b4330935c568a035178b594857f1e4c8916b37c 100644 (file)
@@ -74,7 +74,7 @@ gem 'faye-websocket'
 gem 'themes_for_rails'
 
 gem 'arvados', '>= 0.1.20150615153458'
-gem 'arvados-cli', '>= 0.1.20150128223752'
+gem 'arvados-cli', '>= 0.1.20151023185755'
 
 # pg_power lets us use partial indexes in schema.rb in Rails 3
 gem 'pg_power'
index d671182a57c749b65bd9f7c93d2db215bc762f16..b505b194b2ac35b220c970c9f9e6c45ebeedd9f4 100644 (file)
@@ -41,10 +41,10 @@ GEM
       google-api-client (~> 0.6.3, >= 0.6.3)
       json (~> 1.7, >= 1.7.7)
       jwt (>= 0.1.5, < 1.0.0)
-    arvados-cli (0.1.20150205181653)
+    arvados-cli (0.1.20151023190001)
       activesupport (~> 3.2, >= 3.2.13)
       andand (~> 1.3, >= 1.3.3)
-      arvados (~> 0.1, >= 0.1.20150615153458)
+      arvados (~> 0.1, >= 0.1.20150128223554)
       curb (~> 0.8)
       google-api-client (~> 0.6.3, >= 0.6.3)
       json (~> 1.7, >= 1.7.7)
@@ -69,7 +69,7 @@ GEM
       coffee-script-source
       execjs
     coffee-script-source (1.7.0)
-    curb (0.8.6)
+    curb (0.8.8)
     daemon_controller (1.2.0)
     database_cleaner (1.2.0)
     erubis (2.7.0)
@@ -228,7 +228,7 @@ DEPENDENCIES
   acts_as_api
   andand
   arvados (>= 0.1.20150615153458)
-  arvados-cli (>= 0.1.20150128223752)
+  arvados-cli (>= 0.1.20151023185755)
   coffee-rails (~> 3.2.0)
   database_cleaner
   factory_girl_rails
@@ -256,3 +256,6 @@ DEPENDENCIES
   therubyracer
   trollop
   uglifier (>= 1.0.3)
+
+BUNDLED WITH
+   1.10.6
index fc2ee9300287406479b0d2ec1ff97e88418c52a4..d2a512bde75f68c790360813b47872b748481be1 100644 (file)
@@ -13,7 +13,7 @@ class Arvados::V1::KeepServicesController < ApplicationController
     if request.headers['X-External-Client'] == '1'
       @objects = model_class.where('service_type=?', 'proxy')
     else
-      @objects = model_class.where('service_type=?', 'disk')
+      @objects = model_class.where(model_class.arel_table[:service_type].not_eq('proxy'))
     end
     render_list
   end
index 62d5e59c8d142ce5116da263c9314def02b670d1..ba0f90f90c45d95925df0ae7892815d0b5c490b6 100644 (file)
@@ -22,7 +22,7 @@ class Arvados::V1::SchemaController < ApplicationController
         name: "arvados",
         version: "v1",
         revision: "20131114",
-        source_version: (Rails.application.config.source_version ? Rails.application.config.source_version : "No version information available") + (Rails.application.config.local_modified ? Rails.application.config.local_modified.to_s : ''),
+        source_version: AppVersion.hash,
         generatedAt: db_current_time.iso8601,
         title: "Arvados API",
         description: "The API to interact with Arvados.",
index 64818da375ecc4fb0277a10be0f3f7e6b552a2e8..21c8e4710cb5e62dbe224a6034c68ea1bd40b05e 100644 (file)
@@ -29,6 +29,10 @@ class DatabaseController < ApplicationController
     fixturesets = Dir.glob(Rails.root.join('test', 'fixtures', '*.yml')).
       collect { |yml| yml.match(/([^\/]*)\.yml$/)[1] }
 
+    # Don't reset keep_services: clients need to discover our
+    # integration-testing keepstores, not test fixtures.
+    fixturesets -= %w[keep_services]
+
     table_names = '"' + ActiveRecord::Base.connection.tables.join('","') + '"'
 
     attempts_left = 20
index a61420c450fca9aa8d1b6d7fe1756b0ce3fcd88e..8777f28a86000ee9ccef86f15bd75e8160c2c3bd 100644 (file)
@@ -8,45 +8,6 @@
 # 4. Section in application.default.yml corresponding to RAILS_ENV
 # 5. Section in application.default.yml called "common"
 
-<%
-# If you change any of the code in this block, you'll probably also want
-# to update it in Workbench's application.default.yml.
-def info_cmd(*args, &block)
-  IO.popen(args, "r", chdir: Rails.root, err: "/dev/null", &block)
-end
-
-source_version = ""
-local_modified = false
-if Rails.env == "production"
-  # Read the version from our package's git-commit.version file, if available.
-  begin
-    source_version = IO.read(Rails.root.join("git-commit.version")).strip
-  rescue Errno::ENOENT
-  end
-end
-
-if source_version.empty?
-  begin
-    status_output = false
-    info_cmd("git", "status", "-s") do |git_pipe|
-      git_pipe.each_line do |_|
-        status_output = true
-        # Continue reading the pipe so git doesn't get SIGPIPE.
-      end
-    end
-    if $?.success?
-      info_cmd("git", "log", "-n1", "--format=%H") do |git_pipe|
-        git_pipe.each_line do |line|
-          source_version = line.chomp
-        end
-      end
-      local_modified = status_output
-    end
-  rescue SystemCallError
-  end
-end
-%>
-
 common:
   ###
   ### Essential site configuration
@@ -365,9 +326,11 @@ common:
 
   default_openid_prefix: https://www.google.com/accounts/o8/id
 
-  # source_version
-  source_version: "<%= source_version[0...8] %>"
-  local_modified: false
+  # Override the automatic version string. With the default value of
+  # false, the version string is read from git-commit.version in
+  # Rails.root (included in vendor packages) or determined by invoking
+  # "git log".
+  source_version: false
 
 
 development:
@@ -384,7 +347,6 @@ development:
   active_record.auto_explain_threshold_in_seconds: 0.5
   assets.compress: false
   assets.debug: true
-  local_modified: "<%= local_modified ? '-modified' : '' %>"
 
 production:
   force_ssl: true
index 149770c9ac38c9e40b311fae2b429eebd09009d1..3681952508509b07ea13168a712ecfa685165857 100644 (file)
@@ -21,7 +21,7 @@ production:
   sso_app_id: ~
   sso_provider_url: ~
   workbench_address: ~
-  websockets_address: ~
+  websocket_address: ~
   #git_repositories_dir: ~
   #git_internal_dir: ~
 
@@ -34,7 +34,7 @@ development:
   sso_app_secret: ~
   sso_provider_url: ~
   workbench_address: ~
-  websockets_address: ~
+  websocket_address: ~
   #git_repositories_dir: ~
   #git_internal_dir: ~
 
diff --git a/services/api/config/initializers/app_version.rb b/services/api/config/initializers/app_version.rb
new file mode 100644 (file)
index 0000000..c904856
--- /dev/null
@@ -0,0 +1 @@
+require 'app_version'
diff --git a/services/api/lib/app_version.rb b/services/api/lib/app_version.rb
new file mode 100644 (file)
index 0000000..769f4e5
--- /dev/null
@@ -0,0 +1,52 @@
+# If you change this file, you'll probably also want to make the same
+# changes in apps/workbench/lib/app_version.rb.
+
+class AppVersion
+  def self.git(*args, &block)
+    IO.popen(["git", "--git-dir", ".git"] + args, "r",
+             chdir: Rails.root.join('../..'),
+             err: "/dev/null",
+             &block)
+  end
+
+  def self.forget
+    @hash = nil
+  end
+
+  # Return abbrev commit hash for current code version: "abc1234", or
+  # "abc1234-modified" if there are uncommitted changes. If present,
+  # return contents of {root}/git-commit.version instead.
+  def self.hash
+    if (cached = Rails.configuration.source_version || @hash)
+      return cached
+    end
+
+    # Read the version from our package's git-commit.version file, if available.
+    begin
+      @hash = IO.read(Rails.root.join("git-commit.version")).strip
+    rescue Errno::ENOENT
+    end
+
+    if @hash.nil? or @hash.empty?
+      begin
+        local_modified = false
+        git("status", "--porcelain") do |git_pipe|
+          git_pipe.each_line do |_|
+            local_modified = true
+            # Continue reading the pipe so git doesn't get SIGPIPE.
+          end
+        end
+        if $?.success?
+          git("log", "-n1", "--format=%H") do |git_pipe|
+            git_pipe.each_line do |line|
+              @hash = line.chomp[0...8] + (local_modified ? '-modified' : '')
+            end
+          end
+        end
+      rescue SystemCallError
+      end
+    end
+
+    @hash || "unknown"
+  end
+end
diff --git a/services/api/lib/create_superuser_token.rb b/services/api/lib/create_superuser_token.rb
new file mode 100755 (executable)
index 0000000..54faa9a
--- /dev/null
@@ -0,0 +1,48 @@
+# Install the supplied string (or a randomly generated token, if none
+# is given) as an API token that authenticates to the system user account.
+
+module CreateSuperUserToken
+  require File.dirname(__FILE__) + '/../config/boot'
+  require File.dirname(__FILE__) + '/../config/environment'
+
+  include ApplicationHelper
+
+  def create_superuser_token supplied_token=nil
+    act_as_system_user do
+      # If token is supplied, verify that it indeed is a superuser token
+      if supplied_token
+        api_client_auth = ApiClientAuthorization.
+          where(api_token: supplied_token).
+          first
+        if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
+          raise "Token already exists but is not a superuser token."
+        end
+      end
+
+      # need to create a token
+      if !api_client_auth
+        # Get (or create) trusted api client
+        apiClient =  ApiClient.find_or_create_by_url_prefix_and_is_trusted("ssh://root@localhost/", true)
+
+        # Check if there is an unexpired superuser token corresponding to this api client
+        api_client_auth = ApiClientAuthorization.where(
+                'user_id = (?) AND
+                 api_client_id = (?) AND
+                 (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)',
+               system_user.id, apiClient.id).first
+
+        # none exist; create one with the supplied token
+        if !api_client_auth
+          api_client_auth = ApiClientAuthorization.
+            new(user: system_user,
+              api_client_id: apiClient.id,
+              created_by_ip_address: '::1',
+              api_token: supplied_token)
+          api_client_auth.save!
+        end
+      end
+
+      api_client_auth.api_token
+    end
+  end
+end
diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb
new file mode 100644 (file)
index 0000000..5d598d4
--- /dev/null
@@ -0,0 +1,854 @@
+require 'open3'
+require 'shellwords'
+
+class CrunchDispatch
+  include ApplicationHelper
+  include Process
+
+  EXIT_TEMPFAIL = 75
+  EXIT_RETRY_UNLOCKED = 93
+  RETRY_UNLOCKED_LIMIT = 3
+
+  class LogTime < Time
+    def to_s
+      self.utc.strftime "%Y-%m-%d_%H:%M:%S"
+    end
+  end
+
+  def initialize
+    @crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip)
+    if @crunch_job_bin.empty?
+      raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
+    end
+
+    @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN']
+    @docker_run_args = ENV['CRUNCH_JOB_DOCKER_RUN_ARGS']
+
+    @arvados_internal = Rails.configuration.git_internal_dir
+    if not File.exists? @arvados_internal
+      $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
+      raise "No internal git repository available" unless ($? == 0)
+    end
+
+    @repo_root = Rails.configuration.git_repositories_dir
+    @arvados_repo_path = Repository.where(name: "arvados").first.server_path
+    @authorizations = {}
+    @did_recently = {}
+    @fetched_commits = {}
+    @git_tags = {}
+    @node_state = {}
+    @pipe_auth_tokens = {}
+    @running = {}
+    @todo = []
+    @todo_job_retries = {}
+    @job_retry_counts = Hash.new(0)
+    @todo_pipelines = []
+  end
+
+  def sysuser
+    return act_as_system_user
+  end
+
+  def refresh_todo
+    if @runoptions[:jobs]
+      @todo = @todo_job_retries.values + Job.queue.select(&:repository)
+    end
+    if @runoptions[:pipelines]
+      @todo_pipelines = PipelineInstance.queue
+    end
+  end
+
+  def each_slurm_line(cmd, outfmt, max_fields=nil)
+    max_fields ||= outfmt.split(":").size
+    max_fields += 1  # To accommodate the node field we add
+    @@slurm_version ||= Gem::Version.new(`sinfo --version`.match(/\b[\d\.]+\b/)[0])
+    if Gem::Version.new('2.3') <= @@slurm_version
+      `#{cmd} --noheader -o '%n:#{outfmt}'`.each_line do |line|
+        yield line.chomp.split(":", max_fields)
+      end
+    else
+      # Expand rows with hostname ranges (like "foo[1-3,5,9-12]:idle")
+      # into multiple rows with one hostname each.
+      `#{cmd} --noheader -o '%N:#{outfmt}'`.each_line do |line|
+        tokens = line.chomp.split(":", max_fields)
+        if (re = tokens[0].match /^(.*?)\[([-,\d]+)\]$/)
+          tokens.shift
+          re[2].split(",").each do |range|
+            range = range.split("-").collect(&:to_i)
+            (range[0]..range[-1]).each do |n|
+              yield [re[1] + n.to_s] + tokens
+            end
+          end
+        else
+          yield tokens
+        end
+      end
+    end
+  end
+
+  def slurm_status
+    slurm_nodes = {}
+    each_slurm_line("sinfo", "%t") do |hostname, state|
+      # Treat nodes in idle* state as down, because the * means that slurm
+      # hasn't been able to communicate with it recently.
+      state.sub!(/^idle\*/, "down")
+      state.sub!(/\W+$/, "")
+      state = "down" unless %w(idle alloc down).include?(state)
+      slurm_nodes[hostname] = {state: state, job: nil}
+    end
+    each_slurm_line("squeue", "%j") do |hostname, job_uuid|
+      slurm_nodes[hostname][:job] = job_uuid if slurm_nodes[hostname]
+    end
+    slurm_nodes
+  end
+
+  def update_node_status
+    return unless Server::Application.config.crunch_job_wrapper.to_s.match /^slurm/
+    slurm_status.each_pair do |hostname, slurmdata|
+      next if @node_state[hostname] == slurmdata
+      begin
+        node = Node.where('hostname=?', hostname).order(:last_ping_at).last
+        if node
+          $stderr.puts "dispatch: update #{hostname} state to #{slurmdata}"
+          node.info["slurm_state"] = slurmdata[:state]
+          node.job_uuid = slurmdata[:job]
+          if node.save
+            @node_state[hostname] = slurmdata
+          else
+            $stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
+          end
+        elsif slurmdata[:state] != 'down'
+          $stderr.puts "dispatch: SLURM reports '#{hostname}' is not down, but no node has that name"
+        end
+      rescue => error
+        $stderr.puts "dispatch: error updating #{hostname} node status: #{error}"
+      end
+    end
+  end
+
+  def positive_int(raw_value, default=nil)
+    value = begin raw_value.to_i rescue 0 end
+    if value > 0
+      value
+    else
+      default
+    end
+  end
+
+  NODE_CONSTRAINT_MAP = {
+    # Map Job runtime_constraints keys to the corresponding Node info key.
+    'min_ram_mb_per_node' => 'total_ram_mb',
+    'min_scratch_mb_per_node' => 'total_scratch_mb',
+    'min_cores_per_node' => 'total_cpu_cores',
+  }
+
+  def nodes_available_for_job_now(job)
+    # Find Nodes that satisfy a Job's runtime constraints (by building
+    # a list of Procs and using them to test each Node).  If there
+    # enough to run the Job, return an array of their names.
+    # Otherwise, return nil.
+    need_procs = NODE_CONSTRAINT_MAP.each_pair.map do |job_key, node_key|
+      Proc.new do |node|
+        positive_int(node.properties[node_key], 0) >=
+          positive_int(job.runtime_constraints[job_key], 0)
+      end
+    end
+    min_node_count = positive_int(job.runtime_constraints['min_nodes'], 1)
+    usable_nodes = []
+    Node.all.select do |node|
+      node.info['slurm_state'] == 'idle'
+    end.sort_by do |node|
+      # Prefer nodes with no price, then cheap nodes, then expensive nodes
+      node.properties['cloud_node']['price'].to_f rescue 0
+    end.each do |node|
+      if need_procs.select { |need_proc| not need_proc.call(node) }.any?
+        # At least one runtime constraint is not satisfied by this node
+        next
+      end
+      usable_nodes << node
+      if usable_nodes.count >= min_node_count
+        return usable_nodes.map { |node| node.hostname }
+      end
+    end
+    nil
+  end
+
+  def nodes_available_for_job(job)
+    # Check if there are enough idle nodes with the Job's minimum
+    # hardware requirements to run it.  If so, return an array of
+    # their names.  If not, up to once per hour, signal start_jobs to
+    # hold off launching Jobs.  This delay is meant to give the Node
+    # Manager an opportunity to make new resources available for new
+    # Jobs.
+    #
+    # The exact timing parameters here might need to be adjusted for
+    # the best balance between helping the longest-waiting Jobs run,
+    # and making efficient use of immediately available resources.
+    # These are all just first efforts until we have more data to work
+    # with.
+    nodelist = nodes_available_for_job_now(job)
+    if nodelist.nil? and not did_recently(:wait_for_available_nodes, 3600)
+      $stderr.puts "dispatch: waiting for nodes for #{job.uuid}"
+      @node_wait_deadline = Time.now + 5.minutes
+    end
+    nodelist
+  end
+
+  def fail_job job, message
+    $stderr.puts "dispatch: #{job.uuid}: #{message}"
+    begin
+      Log.new(object_uuid: job.uuid,
+              event_type: 'dispatch',
+              owner_uuid: job.owner_uuid,
+              summary: message,
+              properties: {"text" => message}).save!
+    rescue
+      $stderr.puts "dispatch: log.create failed"
+    end
+
+    if not have_job_lock?(job)
+      begin
+        job.lock @authorizations[job.uuid].user.uuid
+      rescue ArvadosModel::AlreadyLockedError
+        $stderr.puts "dispatch: tried to mark job #{job.uuid} as failed but it was already locked by someone else"
+        return
+      end
+    end
+
+    job.state = "Failed"
+    if not job.save
+      $stderr.puts "dispatch: save failed setting job #{job.uuid} to failed"
+    end
+  end
+
+  def stdout_s(cmd_a, opts={})
+    IO.popen(cmd_a, "r", opts) do |pipe|
+      return pipe.read.chomp
+    end
+  end
+
+  def git_cmd(*cmd_a)
+    ["git", "--git-dir=#{@arvados_internal}"] + cmd_a
+  end
+
+  def get_authorization(job)
+    if @authorizations[job.uuid] and
+        @authorizations[job.uuid].user.uuid != job.modified_by_user_uuid
+      # We already made a token for this job, but we need a new one
+      # because modified_by_user_uuid has changed (the job will run
+      # as a different user).
+      @authorizations[job.uuid].update_attributes expires_at: Time.now
+      @authorizations[job.uuid] = nil
+    end
+    if not @authorizations[job.uuid]
+      auth = ApiClientAuthorization.
+        new(user: User.where('uuid=?', job.modified_by_user_uuid).first,
+            api_client_id: 0)
+      if not auth.save
+        $stderr.puts "dispatch: auth.save failed for #{job.uuid}"
+      else
+        @authorizations[job.uuid] = auth
+      end
+    end
+    @authorizations[job.uuid]
+  end
+
+  def internal_repo_has_commit? sha1
+    if (not @fetched_commits[sha1] and
+        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
+        $? == 0)
+      @fetched_commits[sha1] = true
+    end
+    return @fetched_commits[sha1]
+  end
+
+  def get_commit src_repo, sha1
+    return true if internal_repo_has_commit? sha1
+
+    # commit does not exist in internal repository, so import the
+    # source repository using git fetch-pack
+    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
+    $stderr.puts "dispatch: #{cmd}"
+    $stderr.puts(stdout_s(cmd))
+    @fetched_commits[sha1] = ($? == 0)
+  end
+
+  def tag_commit(commit_hash, tag_name)
+    # @git_tags[T]==V if we know commit V has been tagged T in the
+    # arvados_internal repository.
+    if not @git_tags[tag_name]
+      cmd = git_cmd("tag", tag_name, commit_hash)
+      $stderr.puts "dispatch: #{cmd}"
+      $stderr.puts(stdout_s(cmd, err: "/dev/null"))
+      unless $? == 0
+        # git tag failed.  This may be because the tag already exists, so check for that.
+        tag_rev = stdout_s(git_cmd("rev-list", "-n1", tag_name))
+        if $? == 0
+          # We got a revision back
+          if tag_rev != commit_hash
+            # Uh oh, the tag doesn't point to the revision we were expecting.
+            # Someone has been monkeying with the job record and/or git.
+            fail_job job, "Existing tag #{tag_name} points to commit #{tag_rev} but expected commit #{commit_hash}"
+            return nil
+          end
+          # we're okay (fall through to setting @git_tags below)
+        else
+          # git rev-list failed for some reason.
+          fail_job job, "'git tag' for #{tag_name} failed but did not find any existing tag using 'git rev-list'"
+          return nil
+        end
+      end
+      # 'git tag' was successful, or there is an existing tag that points to the same revision.
+      @git_tags[tag_name] = commit_hash
+    elsif @git_tags[tag_name] != commit_hash
+      fail_job job, "Existing tag #{tag_name} points to commit #{@git_tags[tag_name]} but this job uses commit #{commit_hash}"
+      return nil
+    end
+    @git_tags[tag_name]
+  end
+
+  def start_jobs
+    @todo.each do |job|
+      next if @running[job.uuid]
+
+      cmd_args = nil
+      case Server::Application.config.crunch_job_wrapper
+      when :none
+        if @running.size > 0
+            # Don't run more than one at a time.
+            return
+        end
+        cmd_args = []
+      when :slurm_immediate
+        nodelist = nodes_available_for_job(job)
+        if nodelist.nil?
+          if Time.now < @node_wait_deadline
+            break
+          else
+            next
+          end
+        end
+        cmd_args = ["salloc",
+                    "--chdir=/",
+                    "--immediate",
+                    "--exclusive",
+                    "--no-kill",
+                    "--job-name=#{job.uuid}",
+                    "--nodelist=#{nodelist.join(',')}"]
+      else
+        raise "Unknown crunch_job_wrapper: #{Server::Application.config.crunch_job_wrapper}"
+      end
+
+      if Server::Application.config.crunch_job_user
+        cmd_args.unshift("sudo", "-E", "-u",
+                         Server::Application.config.crunch_job_user,
+                         "LD_LIBRARY_PATH=#{ENV['LD_LIBRARY_PATH']}",
+                         "PATH=#{ENV['PATH']}",
+                         "PERLLIB=#{ENV['PERLLIB']}",
+                         "PYTHONPATH=#{ENV['PYTHONPATH']}",
+                         "RUBYLIB=#{ENV['RUBYLIB']}",
+                         "GEM_PATH=#{ENV['GEM_PATH']}")
+      end
+
+      next unless get_authorization job
+
+      ready = internal_repo_has_commit? job.script_version
+
+      if not ready
+        # Import the commit from the specified repository into the
+        # internal repository. This should have been done already when
+        # the job was created/updated; this code is obsolete except to
+        # avoid deployment races. Failing the job would be a
+        # reasonable thing to do at this point.
+        repo = Repository.where(name: job.repository).first
+        if repo.nil? or repo.server_path.nil?
+          fail_job "Repository #{job.repository} not found under #{@repo_root}"
+          next
+        end
+        ready &&= get_commit repo.server_path, job.script_version
+        ready &&= tag_commit job.script_version, job.uuid
+      end
+
+      # This should be unnecessary, because API server does it during
+      # job create/update, but it's still not a bad idea to verify the
+      # tag is correct before starting the job:
+      ready &&= tag_commit job.script_version, job.uuid
+
+      # The arvados_sdk_version doesn't support use of arbitrary
+      # remote URLs, so the requested version isn't necessarily copied
+      # into the internal repository yet.
+      if job.arvados_sdk_version
+        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
+        ready &&= tag_commit job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
+      end
+
+      if not ready
+        fail_job job, "commit not present in internal repository"
+        next
+      end
+
+      cmd_args += [@crunch_job_bin,
+                   '--job-api-token', @authorizations[job.uuid].api_token,
+                   '--job', job.uuid,
+                   '--git-dir', @arvados_internal]
+
+      if @docker_bin
+        cmd_args += ['--docker-bin', @docker_bin]
+      end
+
+      if @docker_run_args
+        cmd_args += ['--docker-run-args', @docker_run_args]
+      end
+
+      if have_job_lock?(job)
+        cmd_args << "--force-unlock"
+      end
+
+      $stderr.puts "dispatch: #{cmd_args.join ' '}"
+
+      begin
+        i, o, e, t = Open3.popen3(*cmd_args)
+      rescue
+        $stderr.puts "dispatch: popen3: #{$!}"
+        sleep 1
+        next
+      end
+
+      $stderr.puts "dispatch: job #{job.uuid}"
+      start_banner = "dispatch: child #{t.pid} start #{LogTime.now}"
+      $stderr.puts start_banner
+
+      @running[job.uuid] = {
+        stdin: i,
+        stdout: o,
+        stderr: e,
+        wait_thr: t,
+        job: job,
+        buf: {stderr: '', stdout: ''},
+        started: false,
+        sent_int: 0,
+        job_auth: @authorizations[job.uuid],
+        stderr_buf_to_flush: '',
+        stderr_flushed_at: Time.new(0),
+        bytes_logged: 0,
+        events_logged: 0,
+        log_throttle_is_open: true,
+        log_throttle_reset_time: Time.now + Rails.configuration.crunch_log_throttle_period,
+        log_throttle_bytes_so_far: 0,
+        log_throttle_lines_so_far: 0,
+        log_throttle_bytes_skipped: 0,
+      }
+      i.close
+      @todo_job_retries.delete(job.uuid)
+      update_node_status
+    end
+  end
+
+  # Test for hard cap on total output and for log throttling.  Returns whether
+  # the log line should go to output or not.  Modifies "line" in place to
+  # replace it with an error if a logging limit is tripped.
+  def rate_limit running_job, line
+    message = false
+    linesize = line.size
+    if running_job[:log_throttle_is_open]
+      running_job[:log_throttle_lines_so_far] += 1
+      running_job[:log_throttle_bytes_so_far] += linesize
+      running_job[:bytes_logged] += linesize
+
+      if (running_job[:bytes_logged] >
+          Rails.configuration.crunch_limit_log_bytes_per_job)
+        message = "Exceeded log limit #{Rails.configuration.crunch_limit_log_bytes_per_job} bytes (crunch_limit_log_bytes_per_job). Log will be truncated."
+        running_job[:log_throttle_reset_time] = Time.now + 100.years
+        running_job[:log_throttle_is_open] = false
+
+      elsif (running_job[:log_throttle_bytes_so_far] >
+             Rails.configuration.crunch_log_throttle_bytes)
+        remaining_time = running_job[:log_throttle_reset_time] - Time.now
+        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_bytes} bytes per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_bytes). Logging will be silenced for the next #{remaining_time.round} seconds.\n"
+        running_job[:log_throttle_is_open] = false
+
+      elsif (running_job[:log_throttle_lines_so_far] >
+             Rails.configuration.crunch_log_throttle_lines)
+        remaining_time = running_job[:log_throttle_reset_time] - Time.now
+        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_lines} lines per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_lines), logging will be silenced for the next #{remaining_time.round} seconds.\n"
+        running_job[:log_throttle_is_open] = false
+      end
+    end
+
+    if not running_job[:log_throttle_is_open]
+      # Don't log anything if any limit has been exceeded. Just count lossage.
+      running_job[:log_throttle_bytes_skipped] += linesize
+    end
+
+    if message
+      # Yes, write to logs, but use our "rate exceeded" message
+      # instead of the log message that exceeded the limit.
+      line.replace message
+      true
+    else
+      running_job[:log_throttle_is_open]
+    end
+  end
+
+  def read_pipes
+    @running.each do |job_uuid, j|
+      job = j[:job]
+
+      now = Time.now
+      if now > j[:log_throttle_reset_time]
+        # It has been more than throttle_period seconds since the last
+        # checkpoint so reset the throttle
+        if j[:log_throttle_bytes_skipped] > 0
+          message = "#{job_uuid} ! Skipped #{j[:log_throttle_bytes_skipped]} bytes of log"
+          $stderr.puts message
+          j[:stderr_buf_to_flush] << "#{LogTime.now} #{message}\n"
+        end
+
+        j[:log_throttle_reset_time] = now + Rails.configuration.crunch_log_throttle_period
+        j[:log_throttle_bytes_so_far] = 0
+        j[:log_throttle_lines_so_far] = 0
+        j[:log_throttle_bytes_skipped] = 0
+        j[:log_throttle_is_open] = true
+      end
+
+      j[:buf].each do |stream, streambuf|
+        # Read some data from the child stream
+        buf = ''
+        begin
+          # It's important to use a big enough buffer here. When we're
+          # being flooded with logs, we must read and discard many
+          # bytes at once. Otherwise, we can easily peg a CPU with
+          # time-checking and other loop overhead. (Quick tests show a
+          # 1MiB buffer working 2.5x as fast as a 64 KiB buffer.)
+          #
+          # So don't reduce this buffer size!
+          buf = j[stream].read_nonblock(2**20)
+        rescue Errno::EAGAIN, EOFError
+        end
+
+        # Short circuit the counting code if we're just going to throw
+        # away the data anyway.
+        if not j[:log_throttle_is_open]
+          j[:log_throttle_bytes_skipped] += streambuf.size + buf.size
+          streambuf.replace ''
+          next
+        elsif buf == ''
+          next
+        end
+
+        # Append to incomplete line from previous read, if any
+        streambuf << buf
+
+        bufend = ''
+        streambuf.each_line do |line|
+          if not line.end_with? $/
+            if line.size > Rails.configuration.crunch_log_throttle_bytes
+              # Without a limit here, we'll use 2x an arbitrary amount
+              # of memory, and waste a lot of time copying strings
+              # around, all without providing any feedback to anyone
+              # about what's going on _or_ hitting any of our throttle
+              # limits.
+              #
+              # Here we leave "line" alone, knowing it will never be
+              # sent anywhere: rate_limit() will reach
+              # crunch_log_throttle_bytes immediately. However, we'll
+              # leave [...] in bufend: if the trailing end of the long
+              # line does end up getting sent anywhere, it will have
+              # some indication that it is incomplete.
+              bufend = "[...]"
+            else
+              # If line length is sane, we'll wait for the rest of the
+              # line to appear in the next read_pipes() call.
+              bufend = line
+              break
+            end
+          end
+          # rate_limit returns true or false as to whether to actually log
+          # the line or not.  It also modifies "line" in place to replace
+          # it with an error if a logging limit is tripped.
+          if rate_limit j, line
+            $stderr.print "#{job_uuid} ! " unless line.index(job_uuid)
+            $stderr.puts line
+            pub_msg = "#{LogTime.now} #{line.strip}\n"
+            j[:stderr_buf_to_flush] << pub_msg
+          end
+        end
+
+        # Leave the trailing incomplete line (if any) in streambuf for
+        # next time.
+        streambuf.replace bufend
+      end
+      # Flush buffered logs to the logs table, if appropriate. We have
+      # to do this even if we didn't collect any new logs this time:
+      # otherwise, buffered data older than seconds_between_events
+      # won't get flushed until new data arrives.
+      write_log j
+    end
+  end
+
+  def reap_children
+    return if 0 == @running.size
+    pid_done = nil
+    j_done = nil
+
+    if false
+      begin
+        pid_done = waitpid(-1, Process::WNOHANG | Process::WUNTRACED)
+        if pid_done
+          j_done = @running.values.
+            select { |j| j[:wait_thr].pid == pid_done }.
+            first
+        end
+      rescue SystemCallError
+        # I have @running processes but system reports I have no
+        # children. This is likely to happen repeatedly if it happens at
+        # all; I will log this no more than once per child process I
+        # start.
+        if 0 < @running.select { |uuid,j| j[:warned_waitpid_error].nil? }.size
+          children = @running.values.collect { |j| j[:wait_thr].pid }.join ' '
+          $stderr.puts "dispatch: IPC bug: waitpid() error (#{$!}), but I have children #{children}"
+        end
+        @running.each do |uuid,j| j[:warned_waitpid_error] = true end
+      end
+    else
+      @running.each do |uuid, j|
+        if j[:wait_thr].status == false
+          pid_done = j[:wait_thr].pid
+          j_done = j
+        end
+      end
+    end
+
+    return if !pid_done
+
+    job_done = j_done[:job]
+
+    # Ensure every last drop of stdout and stderr is consumed.
+    read_pipes
+    # Reset flush timestamp to make sure log gets written.
+    j_done[:stderr_flushed_at] = Time.new(0)
+    # Write any remaining logs.
+    write_log j_done
+
+    j_done[:buf].each do |stream, streambuf|
+      if streambuf != ''
+        $stderr.puts streambuf + "\n"
+      end
+    end
+
+    # Wait the thread (returns a Process::Status)
+    exit_status = j_done[:wait_thr].value.exitstatus
+    exit_tempfail = exit_status == EXIT_TEMPFAIL
+
+    $stderr.puts "dispatch: child #{pid_done} exit #{exit_status}"
+    $stderr.puts "dispatch: job #{job_done.uuid} end"
+
+    jobrecord = Job.find_by_uuid(job_done.uuid)
+
+    if exit_status == EXIT_RETRY_UNLOCKED
+      # The job failed because all of the nodes allocated to it
+      # failed.  Only this crunch-dispatch process can retry the job:
+      # it's already locked, and there's no way to put it back in the
+      # Queued state.  Put it in our internal todo list unless the job
+      # has failed this way excessively.
+      @job_retry_counts[jobrecord.uuid] += 1
+      exit_tempfail = @job_retry_counts[jobrecord.uuid] <= RETRY_UNLOCKED_LIMIT
+      if exit_tempfail
+        @todo_job_retries[jobrecord.uuid] = jobrecord
+      else
+        $stderr.puts("dispatch: job #{jobrecord.uuid} exceeded node failure retry limit -- giving up")
+      end
+    end
+
+    if !exit_tempfail
+      @job_retry_counts.delete(jobrecord.uuid)
+      if jobrecord.state == "Running"
+        # Apparently there was an unhandled error.  That could potentially
+        # include "all allocated nodes failed" when we don't to retry
+        # because the job has already been retried RETRY_UNLOCKED_LIMIT
+        # times.  Fail the job.
+        jobrecord.state = "Failed"
+        if not jobrecord.save
+          $stderr.puts "dispatch: jobrecord.save failed"
+        end
+      end
+    else
+      # If the job failed to run due to an infrastructure
+      # issue with crunch-job or slurm, we want the job to stay in the
+      # queue. If crunch-job exited after losing a race to another
+      # crunch-job process, it exits 75 and we should leave the job
+      # record alone so the winner of the race can do its thing.
+      # If crunch-job exited after all of its allocated nodes failed,
+      # it exits 93, and we want to retry it later (see the
+      # EXIT_RETRY_UNLOCKED `if` block).
+      #
+      # There is still an unhandled race condition: If our crunch-job
+      # process is about to lose a race with another crunch-job
+      # process, but crashes before getting to its "exit 75" (for
+      # example, "cannot fork" or "cannot reach API server") then we
+      # will assume incorrectly that it's our process's fault
+      # jobrecord.started_at is non-nil, and mark the job as failed
+      # even though the winner of the race is probably still doing
+      # fine.
+    end
+
+    # Invalidate the per-job auth token, unless the job is still queued and we
+    # might want to try it again.
+    if jobrecord.state != "Queued" and !@todo_job_retries.include?(jobrecord.uuid)
+      j_done[:job_auth].update_attributes expires_at: Time.now
+    end
+
+    @running.delete job_done.uuid
+  end
+
+  def update_pipelines
+    expire_tokens = @pipe_auth_tokens.dup
+    @todo_pipelines.each do |p|
+      pipe_auth = (@pipe_auth_tokens[p.uuid] ||= ApiClientAuthorization.
+                   create(user: User.where('uuid=?', p.modified_by_user_uuid).first,
+                          api_client_id: 0))
+      puts `export ARVADOS_API_TOKEN=#{pipe_auth.api_token} && arv-run-pipeline-instance --run-pipeline-here --no-wait --instance #{p.uuid}`
+      expire_tokens.delete p.uuid
+    end
+
+    expire_tokens.each do |k, v|
+      v.update_attributes expires_at: Time.now
+      @pipe_auth_tokens.delete k
+    end
+  end
+
+  def parse_argv argv
+    @runoptions = {}
+    (argv.any? ? argv : ['--jobs', '--pipelines']).each do |arg|
+      case arg
+      when '--jobs'
+        @runoptions[:jobs] = true
+      when '--pipelines'
+        @runoptions[:pipelines] = true
+      else
+        abort "Unrecognized command line option '#{arg}'"
+      end
+    end
+    if not (@runoptions[:jobs] or @runoptions[:pipelines])
+      abort "Nothing to do. Please specify at least one of: --jobs, --pipelines."
+    end
+  end
+
+  def run argv
+    parse_argv argv
+
+    # We want files written by crunch-dispatch to be writable by other
+    # processes with the same GID, see bug #7228
+    File.umask(0002)
+
+    # This is how crunch-job child procs know where the "refresh"
+    # trigger file is
+    ENV["CRUNCH_REFRESH_TRIGGER"] = Rails.configuration.crunch_refresh_trigger
+
+    # If salloc can't allocate resources immediately, make it use our
+    # temporary failure exit code.  This ensures crunch-dispatch won't
+    # mark a job failed because of an issue with node allocation.
+    # This often happens when another dispatcher wins the race to
+    # allocate nodes.
+    ENV["SLURM_EXIT_IMMEDIATE"] = CrunchDispatch::EXIT_TEMPFAIL.to_s
+
+    if ENV["CRUNCH_DISPATCH_LOCKFILE"]
+      lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
+      lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644)
+      unless lockfile.flock File::LOCK_EX|File::LOCK_NB
+        abort "Lock unavailable on #{lockfilename} - exit"
+      end
+    end
+
+    @signal = {}
+    %w{TERM INT}.each do |sig|
+      signame = sig
+      Signal.trap(sig) do
+        $stderr.puts "Received #{signame} signal"
+        @signal[:term] = true
+      end
+    end
+
+    act_as_system_user
+    User.first.group_permissions
+    $stderr.puts "dispatch: ready"
+    while !@signal[:term] or @running.size > 0
+      read_pipes
+      if @signal[:term]
+        @running.each do |uuid, j|
+          if !j[:started] and j[:sent_int] < 2
+            begin
+              Process.kill 'INT', j[:wait_thr].pid
+            rescue Errno::ESRCH
+              # No such pid = race condition + desired result is
+              # already achieved
+            end
+            j[:sent_int] += 1
+          end
+        end
+      else
+        refresh_todo unless did_recently(:refresh_todo, 1.0)
+        update_node_status unless did_recently(:update_node_status, 1.0)
+        unless @todo.empty? or did_recently(:start_jobs, 1.0) or @signal[:term]
+          start_jobs
+        end
+        unless (@todo_pipelines.empty? and @pipe_auth_tokens.empty?) or did_recently(:update_pipelines, 5.0)
+          update_pipelines
+        end
+      end
+      reap_children
+      select(@running.values.collect { |j| [j[:stdout], j[:stderr]] }.flatten,
+             [], [], 1)
+    end
+    # If there are jobs we wanted to retry, we have to mark them as failed now.
+    # Other dispatchers can't pick them up because we hold their lock.
+    @todo_job_retries.each_key do |job_uuid|
+      job = Job.find_by_uuid(job_uuid)
+      if job.state == "Running"
+        fail_job(job, "crunch-dispatch was stopped during job's tempfail retry loop")
+      end
+    end
+  end
+
+  protected
+
+  def have_job_lock?(job)
+    # Return true if the given job is locked by this crunch-dispatch, normally
+    # because we've run crunch-job for it.
+    @todo_job_retries.include?(job.uuid)
+  end
+
+  def did_recently(thing, min_interval)
+    if !@did_recently[thing] or @did_recently[thing] < Time.now - min_interval
+      @did_recently[thing] = Time.now
+      false
+    else
+      true
+    end
+  end
+
+  # send message to log table. we want these records to be transient
+  def write_log running_job
+    return if running_job[:stderr_buf_to_flush] == ''
+
+    # Send out to log event if buffer size exceeds the bytes per event or if
+    # it has been at least crunch_log_seconds_between_events seconds since
+    # the last flush.
+    if running_job[:stderr_buf_to_flush].size > Rails.configuration.crunch_log_bytes_per_event or
+        (Time.now - running_job[:stderr_flushed_at]) >= Rails.configuration.crunch_log_seconds_between_events
+      begin
+        log = Log.new(object_uuid: running_job[:job].uuid,
+                      event_type: 'stderr',
+                      owner_uuid: running_job[:job].owner_uuid,
+                      properties: {"text" => running_job[:stderr_buf_to_flush]})
+        log.save!
+        running_job[:events_logged] += 1
+      rescue => exception
+        $stderr.puts "Failed to write logs"
+        $stderr.puts exception.backtrace
+      end
+      running_job[:stderr_buf_to_flush] = ''
+      running_job[:stderr_flushed_at] = Time.now
+    end
+  end
+end
index 1b386556e62ca1363e0c31bad1645236687602b0..b7316a5e19304c1af4b892ea2513393d01406850 100644 (file)
@@ -1,12 +1,13 @@
 namespace :config do
   desc 'Ensure site configuration has all required settings'
   task check: :environment do
+    $stderr.puts "%-32s %s" % ["AppVersion (discovered)", AppVersion.hash]
     $application_config.sort.each do |k, v|
       if ENV.has_key?('QUIET') then
         # Make sure we still check for the variable to exist
         eval("Rails.configuration.#{k}")
       else
-        if /(password|secret)/.match(k) then
+        if /(password|secret|signing_key)/.match(k) then
           # Make sure we still check for the variable to exist, but don't print the value
           eval("Rails.configuration.#{k}")
           $stderr.puts "%-32s %s" % [k, '*********']
index d119f8a887a1244f64e58013da4352fd591b635e..7a2682af46f062b47f59a54dd12303a3e887bca6 100755 (executable)
@@ -6,30 +6,10 @@
 #
 # Print the token on stdout.
 
-supplied_token = ARGV[0]
-
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-
-include ApplicationHelper
-act_as_system_user
+require './lib/create_superuser_token'
+include CreateSuperUserToken
 
-if supplied_token
-  api_client_auth = ApiClientAuthorization.
-    where(api_token: supplied_token).
-    first
-  if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
-    raise ActiveRecord::RecordNotUnique("Token already exists but is not a superuser token.")
-  end
-end
-
-if !api_client_auth
-  api_client_auth = ApiClientAuthorization.
-    new(user: system_user,
-        api_client_id: 0,
-        created_by_ip_address: '::1',
-        api_token: supplied_token)
-  api_client_auth.save!
-end
+supplied_token = ARGV[0]
 
-puts api_client_auth.api_token
+token = CreateSuperUserToken.create_superuser_token supplied_token
+puts token
index 4a1fdbce758d7b552f529419f7c37f970299d298..889e6a47bdeafb423f94ce966d4d9cd5c3bae318 100755 (executable)
 #!/usr/bin/env ruby
 
-# We want files written by crunch-dispatch to be writable by other processes
-# with the same GID, see bug #7228
-File.umask(0002)
-
-require 'shellwords'
-include Process
-
-$options = {}
-(ARGV.any? ? ARGV : ['--jobs', '--pipelines']).each do |arg|
-  case arg
-  when '--jobs'
-    $options[:jobs] = true
-  when '--pipelines'
-    $options[:pipelines] = true
-  else
-    abort "Unrecognized command line option '#{arg}'"
-  end
-end
-if not ($options[:jobs] or $options[:pipelines])
-  abort "Nothing to do. Please specify at least one of: --jobs, --pipelines."
-end
-
-ARGV.reject! { |a| a =~ /--jobs|--pipelines/ }
-
-$warned = {}
-$signal = {}
-%w{TERM INT}.each do |sig|
-  signame = sig
-  Signal.trap(sig) do
-    $stderr.puts "Received #{signame} signal"
-    $signal[:term] = true
-  end
-end
-
-if ENV["CRUNCH_DISPATCH_LOCKFILE"]
-  lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
-  lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644)
-  unless lockfile.flock File::LOCK_EX|File::LOCK_NB
-    abort "Lock unavailable on #{lockfilename} - exit"
-  end
+dispatch_argv = []
+ARGV.reject! do |arg|
+  dispatch_argv.push(arg) if /^--/ =~ arg
 end
 
 ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
-
 require File.dirname(__FILE__) + '/../config/boot'
 require File.dirname(__FILE__) + '/../config/environment'
-require 'open3'
-
-class LogTime < Time
-  def to_s
-    self.utc.strftime "%Y-%m-%d_%H:%M:%S"
-  end
-end
-
-class Dispatcher
-  include ApplicationHelper
-
-  EXIT_TEMPFAIL = 75
-  EXIT_RETRY_UNLOCKED = 93
-  RETRY_UNLOCKED_LIMIT = 3
-
-  def initialize
-    @crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip)
-    if @crunch_job_bin.empty?
-      raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
-    end
-
-    @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN']
-
-    @arvados_internal = Rails.configuration.git_internal_dir
-    if not File.exists? @arvados_internal
-      $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
-      raise "No internal git repository available" unless ($? == 0)
-    end
-
-    @repo_root = Rails.configuration.git_repositories_dir
-    @arvados_repo_path = Repository.where(name: "arvados").first.server_path
-    @authorizations = {}
-    @did_recently = {}
-    @fetched_commits = {}
-    @git_tags = {}
-    @node_state = {}
-    @pipe_auth_tokens = {}
-    @running = {}
-    @todo = []
-    @todo_job_retries = {}
-    @job_retry_counts = Hash.new(0)
-    @todo_pipelines = []
-  end
-
-  def sysuser
-    return act_as_system_user
-  end
-
-  def refresh_todo
-    if $options[:jobs]
-      @todo = @todo_job_retries.values + Job.queue.select(&:repository)
-    end
-    if $options[:pipelines]
-      @todo_pipelines = PipelineInstance.queue
-    end
-  end
-
-  def each_slurm_line(cmd, outfmt, max_fields=nil)
-    max_fields ||= outfmt.split(":").size
-    max_fields += 1  # To accommodate the node field we add
-    @@slurm_version ||= Gem::Version.new(`sinfo --version`.match(/\b[\d\.]+\b/)[0])
-    if Gem::Version.new('2.3') <= @@slurm_version
-      `#{cmd} --noheader -o '%n:#{outfmt}'`.each_line do |line|
-        yield line.chomp.split(":", max_fields)
-      end
-    else
-      # Expand rows with hostname ranges (like "foo[1-3,5,9-12]:idle")
-      # into multiple rows with one hostname each.
-      `#{cmd} --noheader -o '%N:#{outfmt}'`.each_line do |line|
-        tokens = line.chomp.split(":", max_fields)
-        if (re = tokens[0].match /^(.*?)\[([-,\d]+)\]$/)
-          tokens.shift
-          re[2].split(",").each do |range|
-            range = range.split("-").collect(&:to_i)
-            (range[0]..range[-1]).each do |n|
-              yield [re[1] + n.to_s] + tokens
-            end
-          end
-        else
-          yield tokens
-        end
-      end
-    end
-  end
-
-  def slurm_status
-    slurm_nodes = {}
-    each_slurm_line("sinfo", "%t") do |hostname, state|
-      # Treat nodes in idle* state as down, because the * means that slurm
-      # hasn't been able to communicate with it recently.
-      state.sub!(/^idle\*/, "down")
-      state.sub!(/\W+$/, "")
-      state = "down" unless %w(idle alloc down).include?(state)
-      slurm_nodes[hostname] = {state: state, job: nil}
-    end
-    each_slurm_line("squeue", "%j") do |hostname, job_uuid|
-      slurm_nodes[hostname][:job] = job_uuid if slurm_nodes[hostname]
-    end
-    slurm_nodes
-  end
-
-  def update_node_status
-    return unless Server::Application.config.crunch_job_wrapper.to_s.match /^slurm/
-    slurm_status.each_pair do |hostname, slurmdata|
-      next if @node_state[hostname] == slurmdata
-      begin
-        node = Node.where('hostname=?', hostname).order(:last_ping_at).last
-        if node
-          $stderr.puts "dispatch: update #{hostname} state to #{slurmdata}"
-          node.info["slurm_state"] = slurmdata[:state]
-          node.job_uuid = slurmdata[:job]
-          if node.save
-            @node_state[hostname] = slurmdata
-          else
-            $stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
-          end
-        elsif slurmdata[:state] != 'down'
-          $stderr.puts "dispatch: SLURM reports '#{hostname}' is not down, but no node has that name"
-        end
-      rescue => error
-        $stderr.puts "dispatch: error updating #{hostname} node status: #{error}"
-      end
-    end
-  end
-
-  def positive_int(raw_value, default=nil)
-    value = begin raw_value.to_i rescue 0 end
-    if value > 0
-      value
-    else
-      default
-    end
-  end
-
-  NODE_CONSTRAINT_MAP = {
-    # Map Job runtime_constraints keys to the corresponding Node info key.
-    'min_ram_mb_per_node' => 'total_ram_mb',
-    'min_scratch_mb_per_node' => 'total_scratch_mb',
-    'min_cores_per_node' => 'total_cpu_cores',
-  }
-
-  def nodes_available_for_job_now(job)
-    # Find Nodes that satisfy a Job's runtime constraints (by building
-    # a list of Procs and using them to test each Node).  If there
-    # enough to run the Job, return an array of their names.
-    # Otherwise, return nil.
-    need_procs = NODE_CONSTRAINT_MAP.each_pair.map do |job_key, node_key|
-      Proc.new do |node|
-        positive_int(node.info[node_key], 0) >=
-          positive_int(job.runtime_constraints[job_key], 0)
-      end
-    end
-    min_node_count = positive_int(job.runtime_constraints['min_nodes'], 1)
-    usable_nodes = []
-    Node.find_each do |node|
-      good_node = (node.info['slurm_state'] == 'idle')
-      need_procs.each { |node_test| good_node &&= node_test.call(node) }
-      if good_node
-        usable_nodes << node
-        if usable_nodes.count >= min_node_count
-          return usable_nodes.map { |node| node.hostname }
-        end
-      end
-    end
-    nil
-  end
-
-  def nodes_available_for_job(job)
-    # Check if there are enough idle nodes with the Job's minimum
-    # hardware requirements to run it.  If so, return an array of
-    # their names.  If not, up to once per hour, signal start_jobs to
-    # hold off launching Jobs.  This delay is meant to give the Node
-    # Manager an opportunity to make new resources available for new
-    # Jobs.
-    #
-    # The exact timing parameters here might need to be adjusted for
-    # the best balance between helping the longest-waiting Jobs run,
-    # and making efficient use of immediately available resources.
-    # These are all just first efforts until we have more data to work
-    # with.
-    nodelist = nodes_available_for_job_now(job)
-    if nodelist.nil? and not did_recently(:wait_for_available_nodes, 3600)
-      $stderr.puts "dispatch: waiting for nodes for #{job.uuid}"
-      @node_wait_deadline = Time.now + 5.minutes
-    end
-    nodelist
-  end
-
-  def fail_job job, message
-    $stderr.puts "dispatch: #{job.uuid}: #{message}"
-    begin
-      Log.new(object_uuid: job.uuid,
-              event_type: 'dispatch',
-              owner_uuid: job.owner_uuid,
-              summary: message,
-              properties: {"text" => message}).save!
-    rescue
-      $stderr.puts "dispatch: log.create failed"
-    end
-
-    begin
-      job.lock @authorizations[job.uuid].user.uuid
-      job.state = "Failed"
-      if not job.save
-        $stderr.puts "dispatch: save failed setting job #{job.uuid} to failed"
-      end
-    rescue ArvadosModel::AlreadyLockedError
-      $stderr.puts "dispatch: tried to mark job #{job.uuid} as failed but it was already locked by someone else"
-    end
-  end
-
-  def stdout_s(cmd_a, opts={})
-    IO.popen(cmd_a, "r", opts) do |pipe|
-      return pipe.read.chomp
-    end
-  end
-
-  def git_cmd(*cmd_a)
-    ["git", "--git-dir=#{@arvados_internal}"] + cmd_a
-  end
-
-  def get_authorization(job)
-    if @authorizations[job.uuid] and
-        @authorizations[job.uuid].user.uuid != job.modified_by_user_uuid
-      # We already made a token for this job, but we need a new one
-      # because modified_by_user_uuid has changed (the job will run
-      # as a different user).
-      @authorizations[job.uuid].update_attributes expires_at: Time.now
-      @authorizations[job.uuid] = nil
-    end
-    if not @authorizations[job.uuid]
-      auth = ApiClientAuthorization.
-        new(user: User.where('uuid=?', job.modified_by_user_uuid).first,
-            api_client_id: 0)
-      if not auth.save
-        $stderr.puts "dispatch: auth.save failed for #{job.uuid}"
-      else
-        @authorizations[job.uuid] = auth
-      end
-    end
-    @authorizations[job.uuid]
-  end
-
-  def internal_repo_has_commit? sha1
-    if (not @fetched_commits[sha1] and
-        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
-        $? == 0)
-      @fetched_commits[sha1] = true
-    end
-    return @fetched_commits[sha1]
-  end
-
-  def get_commit src_repo, sha1
-    return true if internal_repo_has_commit? sha1
-
-    # commit does not exist in internal repository, so import the
-    # source repository using git fetch-pack
-    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
-    $stderr.puts "dispatch: #{cmd}"
-    $stderr.puts(stdout_s(cmd))
-    @fetched_commits[sha1] = ($? == 0)
-  end
-
-  def tag_commit(commit_hash, tag_name)
-    # @git_tags[T]==V if we know commit V has been tagged T in the
-    # arvados_internal repository.
-    if not @git_tags[tag_name]
-      cmd = git_cmd("tag", tag_name, commit_hash)
-      $stderr.puts "dispatch: #{cmd}"
-      $stderr.puts(stdout_s(cmd, err: "/dev/null"))
-      unless $? == 0
-        # git tag failed.  This may be because the tag already exists, so check for that.
-        tag_rev = stdout_s(git_cmd("rev-list", "-n1", tag_name))
-        if $? == 0
-          # We got a revision back
-          if tag_rev != commit_hash
-            # Uh oh, the tag doesn't point to the revision we were expecting.
-            # Someone has been monkeying with the job record and/or git.
-            fail_job job, "Existing tag #{tag_name} points to commit #{tag_rev} but expected commit #{commit_hash}"
-            return nil
-          end
-          # we're okay (fall through to setting @git_tags below)
-        else
-          # git rev-list failed for some reason.
-          fail_job job, "'git tag' for #{tag_name} failed but did not find any existing tag using 'git rev-list'"
-          return nil
-        end
-      end
-      # 'git tag' was successful, or there is an existing tag that points to the same revision.
-      @git_tags[tag_name] = commit_hash
-    elsif @git_tags[tag_name] != commit_hash
-      fail_job job, "Existing tag #{tag_name} points to commit #{@git_tags[tag_name]} but this job uses commit #{commit_hash}"
-      return nil
-    end
-    @git_tags[tag_name]
-  end
-
-  def start_jobs
-    @todo.each do |job|
-      next if @running[job.uuid]
-
-      cmd_args = nil
-      case Server::Application.config.crunch_job_wrapper
-      when :none
-        if @running.size > 0
-            # Don't run more than one at a time.
-            return
-        end
-        cmd_args = []
-      when :slurm_immediate
-        nodelist = nodes_available_for_job(job)
-        if nodelist.nil?
-          if Time.now < @node_wait_deadline
-            break
-          else
-            next
-          end
-        end
-        cmd_args = ["salloc",
-                    "--chdir=/",
-                    "--immediate",
-                    "--exclusive",
-                    "--no-kill",
-                    "--job-name=#{job.uuid}",
-                    "--nodelist=#{nodelist.join(',')}"]
-      else
-        raise "Unknown crunch_job_wrapper: #{Server::Application.config.crunch_job_wrapper}"
-      end
-
-      if Server::Application.config.crunch_job_user
-        cmd_args.unshift("sudo", "-E", "-u",
-                         Server::Application.config.crunch_job_user,
-                         "LD_LIBRARY_PATH=#{ENV['LD_LIBRARY_PATH']}",
-                         "PATH=#{ENV['PATH']}",
-                         "PERLLIB=#{ENV['PERLLIB']}",
-                         "PYTHONPATH=#{ENV['PYTHONPATH']}",
-                         "RUBYLIB=#{ENV['RUBYLIB']}",
-                         "GEM_PATH=#{ENV['GEM_PATH']}")
-      end
-
-      next unless get_authorization job
-
-      ready = internal_repo_has_commit? job.script_version
-
-      if not ready
-        # Import the commit from the specified repository into the
-        # internal repository. This should have been done already when
-        # the job was created/updated; this code is obsolete except to
-        # avoid deployment races. Failing the job would be a
-        # reasonable thing to do at this point.
-        repo = Repository.where(name: job.repository).first
-        if repo.nil? or repo.server_path.nil?
-          fail_job "Repository #{job.repository} not found under #{@repo_root}"
-          next
-        end
-        ready &&= get_commit repo.server_path, job.script_version
-        ready &&= tag_commit job.script_version, job.uuid
-      end
-
-      # This should be unnecessary, because API server does it during
-      # job create/update, but it's still not a bad idea to verify the
-      # tag is correct before starting the job:
-      ready &&= tag_commit job.script_version, job.uuid
-
-      # The arvados_sdk_version doesn't support use of arbitrary
-      # remote URLs, so the requested version isn't necessarily copied
-      # into the internal repository yet.
-      if job.arvados_sdk_version
-        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
-        ready &&= tag_commit job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
-      end
-
-      if not ready
-        fail_job job, "commit not present in internal repository"
-        next
-      end
-
-      cmd_args += [@crunch_job_bin,
-                   '--job-api-token', @authorizations[job.uuid].api_token,
-                   '--job', job.uuid,
-                   '--git-dir', @arvados_internal]
-
-      if @docker_bin
-        cmd_args += ['--docker-bin', @docker_bin]
-      end
-
-      if @todo_job_retries.include?(job.uuid)
-        cmd_args << "--force-unlock"
-      end
-
-      $stderr.puts "dispatch: #{cmd_args.join ' '}"
-
-      begin
-        i, o, e, t = Open3.popen3(*cmd_args)
-      rescue
-        $stderr.puts "dispatch: popen3: #{$!}"
-        sleep 1
-        next
-      end
-
-      $stderr.puts "dispatch: job #{job.uuid}"
-      start_banner = "dispatch: child #{t.pid} start #{LogTime.now}"
-      $stderr.puts start_banner
-
-      @running[job.uuid] = {
-        stdin: i,
-        stdout: o,
-        stderr: e,
-        wait_thr: t,
-        job: job,
-        buf: {stderr: '', stdout: ''},
-        started: false,
-        sent_int: 0,
-        job_auth: @authorizations[job.uuid],
-        stderr_buf_to_flush: '',
-        stderr_flushed_at: Time.new(0),
-        bytes_logged: 0,
-        events_logged: 0,
-        log_throttle_is_open: true,
-        log_throttle_reset_time: Time.now + Rails.configuration.crunch_log_throttle_period,
-        log_throttle_bytes_so_far: 0,
-        log_throttle_lines_so_far: 0,
-        log_throttle_bytes_skipped: 0,
-      }
-      i.close
-      @todo_job_retries.delete(job.uuid)
-      update_node_status
-    end
-  end
-
-  # Test for hard cap on total output and for log throttling.  Returns whether
-  # the log line should go to output or not.  Modifies "line" in place to
-  # replace it with an error if a logging limit is tripped.
-  def rate_limit running_job, line
-    message = false
-    linesize = line.size
-    if running_job[:log_throttle_is_open]
-      running_job[:log_throttle_lines_so_far] += 1
-      running_job[:log_throttle_bytes_so_far] += linesize
-      running_job[:bytes_logged] += linesize
-
-      if (running_job[:bytes_logged] >
-          Rails.configuration.crunch_limit_log_bytes_per_job)
-        message = "Exceeded log limit #{Rails.configuration.crunch_limit_log_bytes_per_job} bytes (crunch_limit_log_bytes_per_job). Log will be truncated."
-        running_job[:log_throttle_reset_time] = Time.now + 100.years
-        running_job[:log_throttle_is_open] = false
-
-      elsif (running_job[:log_throttle_bytes_so_far] >
-             Rails.configuration.crunch_log_throttle_bytes)
-        remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_bytes} bytes per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_bytes). Logging will be silenced for the next #{remaining_time.round} seconds.\n"
-        running_job[:log_throttle_is_open] = false
-
-      elsif (running_job[:log_throttle_lines_so_far] >
-             Rails.configuration.crunch_log_throttle_lines)
-        remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_lines} lines per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_lines), logging will be silenced for the next #{remaining_time.round} seconds.\n"
-        running_job[:log_throttle_is_open] = false
-      end
-    end
-
-    if not running_job[:log_throttle_is_open]
-      # Don't log anything if any limit has been exceeded. Just count lossage.
-      running_job[:log_throttle_bytes_skipped] += linesize
-    end
-
-    if message
-      # Yes, write to logs, but use our "rate exceeded" message
-      # instead of the log message that exceeded the limit.
-      line.replace message
-      true
-    else
-      running_job[:log_throttle_is_open]
-    end
-  end
-
-  def read_pipes
-    @running.each do |job_uuid, j|
-      job = j[:job]
-
-      now = Time.now
-      if now > j[:log_throttle_reset_time]
-        # It has been more than throttle_period seconds since the last
-        # checkpoint so reset the throttle
-        if j[:log_throttle_bytes_skipped] > 0
-          message = "#{job_uuid} ! Skipped #{j[:log_throttle_bytes_skipped]} bytes of log"
-          $stderr.puts message
-          j[:stderr_buf_to_flush] << "#{LogTime.now} #{message}\n"
-        end
-
-        j[:log_throttle_reset_time] = now + Rails.configuration.crunch_log_throttle_period
-        j[:log_throttle_bytes_so_far] = 0
-        j[:log_throttle_lines_so_far] = 0
-        j[:log_throttle_bytes_skipped] = 0
-        j[:log_throttle_is_open] = true
-      end
-
-      j[:buf].each do |stream, streambuf|
-        # Read some data from the child stream
-        buf = ''
-        begin
-          # It's important to use a big enough buffer here. When we're
-          # being flooded with logs, we must read and discard many
-          # bytes at once. Otherwise, we can easily peg a CPU with
-          # time-checking and other loop overhead. (Quick tests show a
-          # 1MiB buffer working 2.5x as fast as a 64 KiB buffer.)
-          #
-          # So don't reduce this buffer size!
-          buf = j[stream].read_nonblock(2**20)
-        rescue Errno::EAGAIN, EOFError
-        end
-
-        # Short circuit the counting code if we're just going to throw
-        # away the data anyway.
-        if not j[:log_throttle_is_open]
-          j[:log_throttle_bytes_skipped] += streambuf.size + buf.size
-          streambuf.replace ''
-          next
-        elsif buf == ''
-          next
-        end
-
-        # Append to incomplete line from previous read, if any
-        streambuf << buf
-
-        bufend = ''
-        streambuf.each_line do |line|
-          if not line.end_with? $/
-            if line.size > Rails.configuration.crunch_log_throttle_bytes
-              # Without a limit here, we'll use 2x an arbitrary amount
-              # of memory, and waste a lot of time copying strings
-              # around, all without providing any feedback to anyone
-              # about what's going on _or_ hitting any of our throttle
-              # limits.
-              #
-              # Here we leave "line" alone, knowing it will never be
-              # sent anywhere: rate_limit() will reach
-              # crunch_log_throttle_bytes immediately. However, we'll
-              # leave [...] in bufend: if the trailing end of the long
-              # line does end up getting sent anywhere, it will have
-              # some indication that it is incomplete.
-              bufend = "[...]"
-            else
-              # If line length is sane, we'll wait for the rest of the
-              # line to appear in the next read_pipes() call.
-              bufend = line
-              break
-            end
-          end
-          # rate_limit returns true or false as to whether to actually log
-          # the line or not.  It also modifies "line" in place to replace
-          # it with an error if a logging limit is tripped.
-          if rate_limit j, line
-            $stderr.print "#{job_uuid} ! " unless line.index(job_uuid)
-            $stderr.puts line
-            pub_msg = "#{LogTime.now} #{line.strip}\n"
-            j[:stderr_buf_to_flush] << pub_msg
-          end
-        end
-
-        # Leave the trailing incomplete line (if any) in streambuf for
-        # next time.
-        streambuf.replace bufend
-      end
-      # Flush buffered logs to the logs table, if appropriate. We have
-      # to do this even if we didn't collect any new logs this time:
-      # otherwise, buffered data older than seconds_between_events
-      # won't get flushed until new data arrives.
-      write_log j
-    end
-  end
-
-  def reap_children
-    return if 0 == @running.size
-    pid_done = nil
-    j_done = nil
-
-    if false
-      begin
-        pid_done = waitpid(-1, Process::WNOHANG | Process::WUNTRACED)
-        if pid_done
-          j_done = @running.values.
-            select { |j| j[:wait_thr].pid == pid_done }.
-            first
-        end
-      rescue SystemCallError
-        # I have @running processes but system reports I have no
-        # children. This is likely to happen repeatedly if it happens at
-        # all; I will log this no more than once per child process I
-        # start.
-        if 0 < @running.select { |uuid,j| j[:warned_waitpid_error].nil? }.size
-          children = @running.values.collect { |j| j[:wait_thr].pid }.join ' '
-          $stderr.puts "dispatch: IPC bug: waitpid() error (#{$!}), but I have children #{children}"
-        end
-        @running.each do |uuid,j| j[:warned_waitpid_error] = true end
-      end
-    else
-      @running.each do |uuid, j|
-        if j[:wait_thr].status == false
-          pid_done = j[:wait_thr].pid
-          j_done = j
-        end
-      end
-    end
-
-    return if !pid_done
-
-    job_done = j_done[:job]
-
-    # Ensure every last drop of stdout and stderr is consumed.
-    read_pipes
-    # Reset flush timestamp to make sure log gets written.
-    j_done[:stderr_flushed_at] = Time.new(0)
-    # Write any remaining logs.
-    write_log j_done
-
-    j_done[:buf].each do |stream, streambuf|
-      if streambuf != ''
-        $stderr.puts streambuf + "\n"
-      end
-    end
-
-    # Wait the thread (returns a Process::Status)
-    exit_status = j_done[:wait_thr].value.exitstatus
-    exit_tempfail = exit_status == EXIT_TEMPFAIL
-
-    $stderr.puts "dispatch: child #{pid_done} exit #{exit_status}"
-    $stderr.puts "dispatch: job #{job_done.uuid} end"
-
-    jobrecord = Job.find_by_uuid(job_done.uuid)
-
-    if exit_status == EXIT_RETRY_UNLOCKED
-      # The job failed because all of the nodes allocated to it
-      # failed.  Only this crunch-dispatch process can retry the job:
-      # it's already locked, and there's no way to put it back in the
-      # Queued state.  Put it in our internal todo list unless the job
-      # has failed this way excessively.
-      @job_retry_counts[jobrecord.uuid] += 1
-      exit_tempfail = @job_retry_counts[jobrecord.uuid] <= RETRY_UNLOCKED_LIMIT
-      if exit_tempfail
-        @todo_job_retries[jobrecord.uuid] = jobrecord
-      else
-        $stderr.puts("dispatch: job #{jobrecord.uuid} exceeded node failure retry limit -- giving up")
-      end
-    end
-
-    if !exit_tempfail
-      @job_retry_counts.delete(jobrecord.uuid)
-      if jobrecord.state == "Running"
-        # Apparently there was an unhandled error.  That could potentially
-        # include "all allocated nodes failed" when we don't to retry
-        # because the job has already been retried RETRY_UNLOCKED_LIMIT
-        # times.  Fail the job.
-        jobrecord.state = "Failed"
-        if not jobrecord.save
-          $stderr.puts "dispatch: jobrecord.save failed"
-        end
-      end
-    else
-      # If the job failed to run due to an infrastructure
-      # issue with crunch-job or slurm, we want the job to stay in the
-      # queue. If crunch-job exited after losing a race to another
-      # crunch-job process, it exits 75 and we should leave the job
-      # record alone so the winner of the race can do its thing.
-      # If crunch-job exited after all of its allocated nodes failed,
-      # it exits 93, and we want to retry it later (see the
-      # EXIT_RETRY_UNLOCKED `if` block).
-      #
-      # There is still an unhandled race condition: If our crunch-job
-      # process is about to lose a race with another crunch-job
-      # process, but crashes before getting to its "exit 75" (for
-      # example, "cannot fork" or "cannot reach API server") then we
-      # will assume incorrectly that it's our process's fault
-      # jobrecord.started_at is non-nil, and mark the job as failed
-      # even though the winner of the race is probably still doing
-      # fine.
-    end
-
-    # Invalidate the per-job auth token, unless the job is still queued and we
-    # might want to try it again.
-    if jobrecord.state != "Queued" and !@todo_job_retries.include?(jobrecord.uuid)
-      j_done[:job_auth].update_attributes expires_at: Time.now
-    end
-
-    @running.delete job_done.uuid
-  end
-
-  def update_pipelines
-    expire_tokens = @pipe_auth_tokens.dup
-    @todo_pipelines.each do |p|
-      pipe_auth = (@pipe_auth_tokens[p.uuid] ||= ApiClientAuthorization.
-                   create(user: User.where('uuid=?', p.modified_by_user_uuid).first,
-                          api_client_id: 0))
-      puts `export ARVADOS_API_TOKEN=#{pipe_auth.api_token} && arv-run-pipeline-instance --run-pipeline-here --no-wait --instance #{p.uuid}`
-      expire_tokens.delete p.uuid
-    end
-
-    expire_tokens.each do |k, v|
-      v.update_attributes expires_at: Time.now
-      @pipe_auth_tokens.delete k
-    end
-  end
-
-  def run
-    act_as_system_user
-    User.first.group_permissions
-    $stderr.puts "dispatch: ready"
-    while !$signal[:term] or @running.size > 0
-      read_pipes
-      if $signal[:term]
-        @running.each do |uuid, j|
-          if !j[:started] and j[:sent_int] < 2
-            begin
-              Process.kill 'INT', j[:wait_thr].pid
-            rescue Errno::ESRCH
-              # No such pid = race condition + desired result is
-              # already achieved
-            end
-            j[:sent_int] += 1
-          end
-        end
-      else
-        refresh_todo unless did_recently(:refresh_todo, 1.0)
-        update_node_status unless did_recently(:update_node_status, 1.0)
-        unless @todo.empty? or did_recently(:start_jobs, 1.0) or $signal[:term]
-          start_jobs
-        end
-        unless (@todo_pipelines.empty? and @pipe_auth_tokens.empty?) or did_recently(:update_pipelines, 5.0)
-          update_pipelines
-        end
-      end
-      reap_children
-      select(@running.values.collect { |j| [j[:stdout], j[:stderr]] }.flatten,
-             [], [], 1)
-    end
-    # If there are jobs we wanted to retry, we have to mark them as failed now.
-    # Other dispatchers can't pick them up because we hold their lock.
-    @todo_job_retries.each_key do |job_uuid|
-      job = Job.find_by_uuid(job_uuid)
-      if job.state == "Running"
-        fail_job(job, "crunch-dispatch was stopped during job's tempfail retry loop")
-      end
-    end
-  end
-
-  protected
-
-  def did_recently(thing, min_interval)
-    if !@did_recently[thing] or @did_recently[thing] < Time.now - min_interval
-      @did_recently[thing] = Time.now
-      false
-    else
-      true
-    end
-  end
-
-  # send message to log table. we want these records to be transient
-  def write_log running_job
-    return if running_job[:stderr_buf_to_flush] == ''
-
-    # Send out to log event if buffer size exceeds the bytes per event or if
-    # it has been at least crunch_log_seconds_between_events seconds since
-    # the last flush.
-    if running_job[:stderr_buf_to_flush].size > Rails.configuration.crunch_log_bytes_per_event or
-        (Time.now - running_job[:stderr_flushed_at]) >= Rails.configuration.crunch_log_seconds_between_events
-      begin
-        log = Log.new(object_uuid: running_job[:job].uuid,
-                      event_type: 'stderr',
-                      owner_uuid: running_job[:job].owner_uuid,
-                      properties: {"text" => running_job[:stderr_buf_to_flush]})
-        log.save!
-        running_job[:events_logged] += 1
-      rescue => exception
-        $stderr.puts "Failed to write logs"
-        $stderr.puts exception.backtrace
-      end
-      running_job[:stderr_buf_to_flush] = ''
-      running_job[:stderr_flushed_at] = Time.now
-    end
-  end
-end
-
-# This is how crunch-job child procs know where the "refresh" trigger file is
-ENV["CRUNCH_REFRESH_TRIGGER"] = Rails.configuration.crunch_refresh_trigger
-
-# If salloc can't allocate resources immediately, make it use our temporary
-# failure exit code.  This ensures crunch-dispatch won't mark a job failed
-# because of an issue with node allocation.  This often happens when
-# another dispatcher wins the race to allocate nodes.
-ENV["SLURM_EXIT_IMMEDIATE"] = Dispatcher::EXIT_TEMPFAIL.to_s
+require './lib/crunch_dispatch.rb'
 
-Dispatcher.new.run
+CrunchDispatch.new.run dispatch_argv
index b70807bd38acbb484da6e8f89f0c9525b4491993..7ef98bb7c8571ec8a6f64f4eb4a8906d25c52361 100755 (executable)
@@ -12,6 +12,7 @@
 
 require 'trollop'
 require './lib/salvage_collection'
+include SalvageCollection
 
 opts = Trollop::options do
   banner ''
index 9199d178f6bcdfec3c8536d8da9f7e6b22613898..85c2c791a379552caabc10f6665e1dfca10a2190 100644 (file)
@@ -18,6 +18,18 @@ admin_trustedclient:
   api_token: 1a9ffdcga2o7cw8q12dndskomgs1ygli3ns9k2o9hgzgmktc78
   expires_at: 2038-01-01 00:00:00
 
+data_manager:
+  api_client: untrusted
+  user: system_user
+  api_token: 320mkve8qkswstz7ff61glpk3mhgghmg67wmic7elw4z41pke1
+  expires_at: 2038-01-01 00:00:00
+  scopes:
+    - GET /arvados/v1/collections
+    - GET /arvados/v1/keep_services
+    - GET /arvados/v1/keep_services/accessible
+    - GET /arvados/v1/users/current
+    - POST /arvados/v1/logs
+
 miniadmin:
   api_client: untrusted
   user: miniadmin
@@ -87,7 +99,7 @@ active_all_collections:
   user: active
   api_token: activecollectionsabcdefghijklmnopqrstuvwxyz1234567
   expires_at: 2038-01-01 00:00:00
-  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_disks"]
+  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_services/accessible"]
 
 active_userlist:
   api_client: untrusted
index d7f6f92f186341d9da550ff6b4e95f132787f04f..556a5c047120923aa557a5fe38eab002313aaad3 100644 (file)
@@ -92,7 +92,7 @@ multilevel_collection_2:
   modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
-  manifest_text: "./dir1/sub1 0:0:a 0:0:b\n./dir2/sub2 0:0:c 0:0:d\n"
+  manifest_text: "./dir1/sub1 d41d8cd98f00b204e9800998ecf8427e+0 0:0:a 0:0:b\n./dir2/sub2 d41d8cd98f00b204e9800998ecf8427e+0 0:0:c 0:0:d\n"
   name: multilevel_collection_2
 
 docker_image:
@@ -299,7 +299,7 @@ collection_with_files_in_subdir:
   modified_by_user_uuid: zzzzz-tpzed-user1withloadab
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
-  manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
+  manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
 
 graph_test_collection1:
   uuid: zzzzz-4zz18-bv31uwvy3neko22
@@ -481,7 +481,7 @@ collection_with_repeated_filenames_and_contents_in_two_dirs_1:
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
   name: collection_with_repeated_filenames_and_contents_in_two_dirs_1
-  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34+Af966b611a1e6a7df18e0f20ac742a255c27744b7@550a3f11 0:12:alice 12:12:alice.txt 24:10:bob.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44+A1780092551dadcb9c74190a793a779cea84d632d@550a3f11 0:27:alice.txt\n"
+  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34 0:12:alice 12:12:alice.txt 24:10:bob.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44 0:27:alice.txt\n"
 
 collection_with_repeated_filenames_and_contents_in_two_dirs_2:
   uuid: zzzzz-4zz18-duplicatenames2
@@ -493,7 +493,7 @@ collection_with_repeated_filenames_and_contents_in_two_dirs_2:
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
   name: collection_with_repeated_filenames_and_contents_in_two_dirs_2
-  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34+Af966b611a1e6a7df18e0f20ac742a255c27744b7@550a3f11 0:12:alice 12:12:alice.txt 24:10:carol.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44+A1780092551dadcb9c74190a793a779cea84d632d@550a3f11 0:27:alice.txt\n"
+  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34 0:12:alice 12:12:alice.txt 24:10:carol.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44 0:27:alice.txt\n"
 
 foo_and_bar_files_in_dir:
   uuid: zzzzz-4zz18-foonbarfilesdir
@@ -505,7 +505,7 @@ foo_and_bar_files_in_dir:
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
   name: foo_file_in_dir
-  manifest_text: "./dir1 a84b928ebdbae3f658518c711beaec02+28+A0cff02249e70e8cd6e55dba49fef4afa3f5bfdfb@550acd28 0:3:bar 12:16:foo\n"
+  manifest_text: "./dir1 3858f62230ac3c915f300c664312c63f+6 3:3:bar 0:3:foo\n"
 
 multi_level_to_combine:
   uuid: zzzzz-4zz18-pyw8yp9g3ujh45f
index 8a4c34557c4e45c3cf1e0ae22a675901e8b3754e..12493e35514714dfdffc1e6f5aeabdf09d0dd223 100644 (file)
@@ -482,7 +482,7 @@ completed_job_in_publicly_accessible_project:
   script_parameters:
     input: fa7aeb5140e2848d39b416daeef4ffc5+45
     input2: "stuff2"
-  log: ~
+  log: zzzzz-4zz18-4en62shvi99lxd4
   output: b519d9cb706a29fc7ea24dbea2f05851+93
 
 job_in_publicly_accessible_project_but_other_objects_elsewhere:
index 6ca22e152615c30180de4fc07a5e6d19c2b8ed12..23d2e1955eeee141e263a68f7aa101549e3037b1 100644 (file)
@@ -38,6 +38,7 @@ idle:
   info:
     ping_secret: "69udawxvn3zzj45hs8bumvndricrha4lcpi23pd69e44soanc0"
     slurm_state: "idle"
+  properties:
     total_cpu_cores: 16
 
 was_idle_now_down:
@@ -53,6 +54,7 @@ was_idle_now_down:
   info:
     ping_secret: "1bd1yi0x4lb5q4gzqqtrnq30oyj08r8dtdimmanbqw49z1anz2"
     slurm_state: "idle"
+  properties:
     total_cpu_cores: 16
 
 new_with_no_hostname:
index bfa138d72cdd9fcdac96ad3ec865e39e34838ab1..1375d4c9ce71549bdb43988cf4808eb70a908bf2 100644 (file)
@@ -2,7 +2,7 @@ require 'test_helper'
 
 class Arvados::V1::KeepServicesControllerTest < ActionController::TestCase
 
-  test "search keep_services by service_port with < query" do
+  test "search by service_port with < query" do
     authorize_with :active
     get :index, {
       filters: [['service_port', '<', 25107]]
@@ -11,7 +11,7 @@ class Arvados::V1::KeepServicesControllerTest < ActionController::TestCase
     assert_equal false, assigns(:objects).any?
   end
 
-  test "search keep_disks by service_port with >= query" do
+  test "search by service_port with >= query" do
     authorize_with :active
     get :index, {
       filters: [['service_port', '>=', 25107]]
@@ -20,4 +20,16 @@ class Arvados::V1::KeepServicesControllerTest < ActionController::TestCase
     assert_equal true, assigns(:objects).any?
   end
 
+  [:admin, :active, :inactive, :anonymous].each do |u|
+    test "accessible to #{u} user" do
+      authorize_with u
+      get :accessible
+      assert_response :success
+      assert_not_empty json_response['items']
+      json_response['items'].each do |ks|
+        assert_not_equal ks['service_type'], 'proxy'
+      end
+    end
+  end
+
 end
index 520e36ec373e36a2865a4db4bd45da3f241b66fb..f651d81eb3dd13a049896e0e9cdccb87eda6d548 100644 (file)
@@ -2,6 +2,13 @@ require 'test_helper'
 
 class Arvados::V1::SchemaControllerTest < ActionController::TestCase
 
+  setup do forget end
+  teardown do forget end
+  def forget
+    Rails.cache.delete 'arvados_v1_rest_discovery'
+    AppVersion.forget
+  end
+
   test "should get fresh discovery document" do
     MAX_SCHEMA_AGE = 60
     get :index
@@ -20,4 +27,19 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase
     assert_includes discovery_doc, 'defaultTrashLifetime'
     assert_equal discovery_doc['defaultTrashLifetime'], Rails.application.config.default_trash_lifetime
   end
+
+  test "discovery document has source_version" do
+    get :index
+    assert_response :success
+    discovery_doc = JSON.parse(@response.body)
+    assert_match /^[0-9a-f]+(-modified)?$/, discovery_doc['source_version']
+  end
+
+  test "discovery document overrides source_version with config" do
+    Rails.configuration.source_version = 'aaa888fff'
+    get :index
+    assert_response :success
+    discovery_doc = JSON.parse(@response.body)
+    assert_equal 'aaa888fff', discovery_doc['source_version']
+  end
 end
diff --git a/services/api/test/unit/app_version_test.rb b/services/api/test/unit/app_version_test.rb
new file mode 100644 (file)
index 0000000..3e9b167
--- /dev/null
@@ -0,0 +1,43 @@
+require 'test_helper'
+
+class AppVersionTest < ActiveSupport::TestCase
+
+  setup do AppVersion.forget end
+
+  teardown do AppVersion.forget end
+
+  test 'invoke git processes only on first call' do
+    AppVersion.expects(:git).
+      with("status", "--porcelain").once.
+      yields " M services/api/README\n"
+    AppVersion.expects(:git).
+      with("log", "-n1", "--format=%H").once.
+      yields "da39a3ee5e6b4b0d3255bfef95601890afd80709\n"
+
+    (0..4).each do
+      v = AppVersion.hash
+      assert_equal 'da39a3ee-modified', v
+    end
+  end
+
+  test 'override with configuration' do
+    Rails.configuration.source_version = 'foobar'
+    assert_equal 'foobar', AppVersion.hash
+    Rails.configuration.source_version = false
+    assert_not_equal 'foobar', AppVersion.hash
+  end
+
+  test 'override with file' do
+    path = Rails.root.join 'git-commit.version'
+    assert(!File.exists?(path),
+           "Packaged version file found in source tree: #{path}")
+    begin
+      File.open(path, 'w') do |f|
+        f.write "0.1.abc123\n"
+      end
+      assert_equal "0.1.abc123", AppVersion.hash
+    ensure
+      File.unlink path
+    end
+  end
+end
diff --git a/services/api/test/unit/create_superuser_token_test.rb b/services/api/test/unit/create_superuser_token_test.rb
new file mode 100644 (file)
index 0000000..d5ca3f9
--- /dev/null
@@ -0,0 +1,78 @@
+require 'test_helper'
+require 'create_superuser_token'
+
+class CreateSuperUserTokenTest < ActiveSupport::TestCase
+  include CreateSuperUserToken
+
+  test "create superuser token twice and expect same resutls" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Create token again; this time, we should get the one created earlier
+    token2 = create_superuser_token
+    assert_not_nil token2
+    assert_equal token1, token2
+  end
+
+  test "create superuser token with two different inputs and expect the first both times" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Create token again with some other string and expect the existing superuser token back
+    token2 = create_superuser_token 'someothertokenstring'
+    assert_not_nil token2
+    assert_equal token1, token2
+  end
+
+  test "create superuser token twice and expect same results" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Create token again with that same superuser token and expect it back
+    token2 = create_superuser_token 'atesttoken'
+    assert_not_nil token2
+    assert_equal token1, token2
+  end
+
+  test "create superuser token and invoke again with some other valid token" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    su_token = api_client_authorizations("system_user").api_token
+    token2 = create_superuser_token su_token
+    assert_equal token2, su_token
+  end
+
+  test "create superuser token, expire it, and create again" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Expire this token and call create again; expect a new token created
+    apiClientAuth = ApiClientAuthorization.where(api_token: token1).first
+    Thread.current[:user] = users(:admin)
+    apiClientAuth.update_attributes expires_at: '2000-10-10'
+
+    token2 = create_superuser_token
+    assert_not_nil token2
+    assert_not_equal token1, token2
+  end
+
+  test "invoke create superuser token with an invalid non-superuser token and expect error" do
+    active_user_token = api_client_authorizations("active").api_token
+    e = assert_raises RuntimeError do
+      create_superuser_token active_user_token
+    end
+    assert_not_nil e
+    assert_equal "Token already exists but is not a superuser token.", e.message
+  end
+end
diff --git a/services/api/test/unit/crunch_dispatch_test.rb b/services/api/test/unit/crunch_dispatch_test.rb
new file mode 100644 (file)
index 0000000..09f4af6
--- /dev/null
@@ -0,0 +1,119 @@
+require 'test_helper'
+require 'crunch_dispatch'
+
+class CrunchDispatchTest < ActiveSupport::TestCase
+  test 'choose cheaper nodes first' do
+    act_as_system_user do
+      # Replace test fixtures with a set suitable for testing dispatch
+      Node.destroy_all
+
+      # Idle nodes with different prices
+      [['compute1', 3.20, 32],
+       ['compute2', 1.60, 16],
+       ['compute3', 0.80, 8]].each do |hostname, price, cores|
+        Node.create!(hostname: hostname,
+                     info: {
+                       'slurm_state' => 'idle',
+                     },
+                     properties: {
+                       'cloud_node' => {
+                         'price' => price,
+                       },
+                       'total_cpu_cores' => cores,
+                       'total_ram_mb' => cores*1024,
+                       'total_scratch_mb' => cores*10000,
+                     })
+      end
+
+      # Node with no price information
+      Node.create!(hostname: 'compute4',
+                   info: {
+                     'slurm_state' => 'idle',
+                   },
+                   properties: {
+                     'total_cpu_cores' => 8,
+                     'total_ram_mb' => 8192,
+                     'total_scratch_mb' => 80000,
+                   })
+
+      # Cheap but busy node
+      Node.create!(hostname: 'compute5',
+                   info: {
+                     'slurm_state' => 'alloc',
+                   },
+                   properties: {
+                     'cloud_node' => {
+                       'price' => 0.10,
+                     },
+                     'total_cpu_cores' => 32,
+                     'total_ram_mb' => 32768,
+                     'total_scratch_mb' => 320000,
+                   })
+    end
+
+    dispatch = CrunchDispatch.new
+    [[1, 16384, ['compute2']],
+     [2, 16384, ['compute2', 'compute1']],
+     [2, 8000, ['compute4', 'compute3']],
+    ].each do |min_nodes, min_ram, expect_nodes|
+      job = Job.new(runtime_constraints: {
+                      'min_nodes' => min_nodes,
+                      'min_ram_mb_per_node' => min_ram,
+                    })
+      nodes = dispatch.nodes_available_for_job_now job
+      assert_equal expect_nodes, nodes
+    end
+  end
+
+  test 'respond to TERM' do
+    lockfile = Rails.root.join 'tmp', 'dispatch.lock'
+    ENV['CRUNCH_DISPATCH_LOCKFILE'] = lockfile.to_s
+    begin
+      pid = Process.fork do
+        begin
+          # Abandon database connections inherited from parent
+          # process.  Credit to
+          # https://github.com/kstephens/rails_is_forked
+          ActiveRecord::Base.connection_handler.connection_pools.each_value do |pool|
+            pool.instance_eval do
+              @reserved_connections = {}
+              @connections = []
+            end
+          end
+          ActiveRecord::Base.establish_connection
+
+          dispatch = CrunchDispatch.new
+          dispatch.stubs(:did_recently).returns true
+          dispatch.run []
+        ensure
+          Process.exit!
+        end
+      end
+      assert_with_timeout 5, "Dispatch did not lock #{lockfile}" do
+        !can_lock(lockfile)
+      end
+    ensure
+      Process.kill("TERM", pid)
+    end
+    assert_with_timeout 20, "Dispatch did not unlock #{lockfile}" do
+      can_lock(lockfile)
+    end
+  end
+
+  def assert_with_timeout timeout, message
+    t = 0
+    while (t += 0.1) < timeout
+      if yield
+        return
+      end
+      sleep 0.1
+    end
+    assert false, message + " (waited #{timeout} seconds)"
+  end
+
+  def can_lock lockfile
+    lockfile.open(File::RDWR|File::CREAT, 0644) do |f|
+      return f.flock(File::LOCK_EX|File::LOCK_NB)
+    end
+  end
+end
index b3e49c59a0954ee886b4969880271a7ec5c2291c..35c2f4884f4f99e2894c5125776edfb0db32895c 100644 (file)
@@ -25,7 +25,7 @@ func (s *GitHandlerSuite) TestEnvVars(c *check.C) {
        }
        h := newGitHandler()
        h.(*gitHandler).Path = "/bin/sh"
-       h.(*gitHandler).Args = []string{"-c", "echo HTTP/1.1 200 OK; echo Content-Type: text/plain; echo; env"}
+       h.(*gitHandler).Args = []string{"-c", "printf 'Content-Type: text/plain\r\n\r\n'; env"}
        os.Setenv("GITOLITE_HTTP_HOME", "/test/ghh")
        os.Setenv("GL_BYPASS_ACCESS_CHECKS", "yesplease")
 
@@ -40,14 +40,14 @@ func (s *GitHandlerSuite) TestEnvVars(c *check.C) {
        c.Check(body, check.Matches, `(?ms).*^SERVER_ADDR=`+regexp.QuoteMeta(theConfig.Addr)+`$.*`)
 }
 
-func (s *GitHandlerSuite) TestCGIError(c *check.C) {
+func (s *GitHandlerSuite) TestCGIErrorOnSplitHostPortError(c *check.C) {
        u, err := url.Parse("git.zzzzz.arvadosapi.com/test")
        c.Check(err, check.Equals, nil)
        resp := httptest.NewRecorder()
        req := &http.Request{
                Method:     "GET",
                URL:        u,
-               RemoteAddr: "bogus",
+               RemoteAddr: "test.bad.address.missing.port",
        }
        h := newGitHandler()
        h.ServeHTTP(resp, req)
index 482a5776d92d08306ef8327339200418b887c723..5a95e27b93b26789d10d93f4dadeac849fcfaa9b 100755 (executable)
@@ -83,7 +83,7 @@ class ArvWeb(object):
     def run_fuse_mount(self):
         self.mountdir = tempfile.mkdtemp()
 
-        self.operations = Operations(os.getuid(), os.getgid(), "utf-8")
+        self.operations = Operations(os.getuid(), os.getgid(), self.api, "utf-8")
         self.cdir = CollectionDirectory(llfuse.ROOT_INODE, self.operations.inodes, self.api, 2, self.collection)
         self.operations.inodes.add_entry(self.cdir)
 
index 13f4dc60f71db9567e26f534c2379a63c601289b..69f31afbc9589ce6cd6c9de2a731d5093e2c80cd 100644 (file)
@@ -101,7 +101,7 @@ func TestCopyPipeToChildLogLongLines(t *testing.T) {
        }
 
        if after, err := rcv.ReadBytes('\n'); err != nil || string(after) != "after\n" {
-               t.Fatal("\"after\n\" not received (got \"%s\", %s)", after, err)
+               t.Fatalf("\"after\n\" not received (got \"%s\", %s)", after, err)
        }
 
        select {
index ca03627405e0742ad419b1d1dd6daf64fba7a341..1229f2917e21f9b17d897db8fe85e7adb4e43429 100644 (file)
@@ -10,7 +10,6 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/logger"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
        "git.curoverse.com/arvados.git/sdk/go/util"
-       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
        "log"
        "os"
        "runtime/pprof"
@@ -18,10 +17,7 @@ import (
 )
 
 var (
-       heapProfileFilename string
-       // globals for debugging
-       totalManifestSize uint64
-       maxManifestSize   uint64
+       HeapProfileFilename string
 )
 
 // Collection representation
@@ -67,7 +63,7 @@ type SdkCollectionList struct {
 }
 
 func init() {
-       flag.StringVar(&heapProfileFilename,
+       flag.StringVar(&HeapProfileFilename,
                "heap-profile",
                "",
                "File to write the heap profiles to. Leave blank to skip profiling.")
@@ -79,26 +75,29 @@ func init() {
 // to call multiple times in a single run.
 // Otherwise we would see cumulative numbers as explained here:
 // https://groups.google.com/d/msg/golang-nuts/ZyHciRglQYc/2nh4Ndu2fZcJ
-func WriteHeapProfile() {
-       if heapProfileFilename != "" {
-
-               heapProfile, err := os.Create(heapProfileFilename)
+func WriteHeapProfile() error {
+       if HeapProfileFilename != "" {
+               heapProfile, err := os.Create(HeapProfileFilename)
                if err != nil {
-                       log.Fatal(err)
+                       return err
                }
 
                defer heapProfile.Close()
 
                err = pprof.WriteHeapProfile(heapProfile)
-               if err != nil {
-                       log.Fatal(err)
-               }
+               return err
        }
+
+       return nil
 }
 
 // GetCollectionsAndSummarize gets collections from api and summarizes
-func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections) {
-       results = GetCollections(params)
+func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections, err error) {
+       results, err = GetCollections(params)
+       if err != nil {
+               return
+       }
+
        results.Summarize(params.Logger)
 
        log.Printf("Uuid to Size used: %v", results.OwnerToCollectionSize)
@@ -116,10 +115,11 @@ func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollec
 }
 
 // GetCollections gets collections from api
-func GetCollections(params GetCollectionsParams) (results ReadCollections) {
+func GetCollections(params GetCollectionsParams) (results ReadCollections, err error) {
        if &params.Client == nil {
-               log.Fatalf("params.Client passed to GetCollections() should " +
+               err = fmt.Errorf("params.Client passed to GetCollections() should " +
                        "contain a valid ArvadosClient, but instead it is nil.")
+               return
        }
 
        fieldsWanted := []string{"manifest_text",
@@ -139,26 +139,25 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
 
        var defaultReplicationLevel int
        {
-               value, err := params.Client.Discovery("defaultCollectionReplication")
+               var value interface{}
+               value, err = params.Client.Discovery("defaultCollectionReplication")
                if err != nil {
-                       loggerutil.FatalWithMessage(params.Logger,
-                               fmt.Sprintf("Error querying default collection replication: %v", err))
+                       return
                }
 
                defaultReplicationLevel = int(value.(float64))
                if defaultReplicationLevel <= 0 {
-                       loggerutil.FatalWithMessage(params.Logger,
-                               fmt.Sprintf("Default collection replication returned by arvados SDK "+
-                                       "should be a positive integer but instead it was %d.",
-                                       defaultReplicationLevel))
+                       err = fmt.Errorf("Default collection replication returned by arvados SDK "+
+                               "should be a positive integer but instead it was %d.",
+                               defaultReplicationLevel)
+                       return
                }
        }
 
        initialNumberOfCollectionsAvailable, err :=
                util.NumberItemsAvailable(params.Client, "collections")
        if err != nil {
-               loggerutil.FatalWithMessage(params.Logger,
-                       fmt.Sprintf("Error querying collection count: %v", err))
+               return
        }
        // Include a 1% margin for collections added while we're reading so
        // that we don't have to grow the map in most cases.
@@ -183,22 +182,27 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
                // We're still finding new collections
 
                // Write the heap profile for examining memory usage
-               WriteHeapProfile()
+               err = WriteHeapProfile()
+               if err != nil {
+                       return
+               }
 
                // Get next batch of collections.
                var collections SdkCollectionList
-               err := params.Client.List("collections", sdkParams, &collections)
+               err = params.Client.List("collections", sdkParams, &collections)
                if err != nil {
-                       loggerutil.FatalWithMessage(params.Logger,
-                               fmt.Sprintf("Error querying collections: %v", err))
+                       return
                }
 
                // Process collection and update our date filter.
-               sdkParams["filters"].([][]string)[0][2] =
-                       ProcessCollections(params.Logger,
-                               collections.Items,
-                               defaultReplicationLevel,
-                               results.UUIDToCollection).Format(time.RFC3339)
+               latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
+                       collections.Items,
+                       defaultReplicationLevel,
+                       results.UUIDToCollection)
+               if err != nil {
+                       return results, err
+               }
+               sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
 
                // update counts
                previousTotalCollections = totalCollections
@@ -224,7 +228,7 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
        }
 
        // Write the heap profile for examining memory usage
-       WriteHeapProfile()
+       err = WriteHeapProfile()
 
        return
 }
@@ -240,7 +244,12 @@ func StrCopy(s string) string {
 func ProcessCollections(arvLogger *logger.Logger,
        receivedCollections []SdkCollectionInfo,
        defaultReplicationLevel int,
-       UUIDToCollection map[string]Collection) (latestModificationDate time.Time) {
+       UUIDToCollection map[string]Collection,
+) (
+       latestModificationDate time.Time,
+       maxManifestSize, totalManifestSize uint64,
+       err error,
+) {
        for _, sdkCollection := range receivedCollections {
                collection := Collection{UUID: StrCopy(sdkCollection.UUID),
                        OwnerUUID:         StrCopy(sdkCollection.OwnerUUID),
@@ -248,13 +257,13 @@ func ProcessCollections(arvLogger *logger.Logger,
                        BlockDigestToSize: make(map[blockdigest.BlockDigest]int)}
 
                if sdkCollection.ModifiedAt.IsZero() {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf(
-                                       "Arvados SDK collection returned with unexpected zero "+
-                                               "modification date. This probably means that either we failed to "+
-                                               "parse the modification date or the API server has changed how "+
-                                               "it returns modification dates: %+v",
-                                       collection))
+                       err = fmt.Errorf(
+                               "Arvados SDK collection returned with unexpected zero "+
+                                       "modification date. This probably means that either we failed to "+
+                                       "parse the modification date or the API server has changed how "+
+                                       "it returns modification dates: %+v",
+                               collection)
+                       return
                }
 
                if sdkCollection.ModifiedAt.After(latestModificationDate) {
@@ -265,7 +274,7 @@ func ProcessCollections(arvLogger *logger.Logger,
                        collection.ReplicationLevel = defaultReplicationLevel
                }
 
-               manifest := manifest.Manifest{sdkCollection.ManifestText}
+               manifest := manifest.Manifest{Text: sdkCollection.ManifestText}
                manifestSize := uint64(len(sdkCollection.ManifestText))
 
                if _, alreadySeen := UUIDToCollection[collection.UUID]; !alreadySeen {
@@ -278,16 +287,20 @@ func ProcessCollections(arvLogger *logger.Logger,
                blockChannel := manifest.BlockIterWithDuplicates()
                for block := range blockChannel {
                        if storedSize, stored := collection.BlockDigestToSize[block.Digest]; stored && storedSize != block.Size {
-                               message := fmt.Sprintf(
+                               log.Printf(
                                        "Collection %s contains multiple sizes (%d and %d) for block %s",
                                        collection.UUID,
                                        storedSize,
                                        block.Size,
                                        block.Digest)
-                               loggerutil.FatalWithMessage(arvLogger, message)
                        }
                        collection.BlockDigestToSize[block.Digest] = block.Size
                }
+               if manifest.Err != nil {
+                       err = manifest.Err
+                       return
+               }
+
                collection.TotalSize = 0
                for _, size := range collection.BlockDigestToSize {
                        collection.TotalSize += size
index 07c82e1abc0b5581c523683b4947e7d094f7c9cd..47ab5fa4a8a6793f9712ebb4ba1ff5a9aea503aa 100644 (file)
@@ -1,8 +1,12 @@
 package collection
 
 import (
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        . "gopkg.in/check.v1"
+       "net/http"
+       "net/http/httptest"
        "testing"
 )
 
@@ -121,3 +125,78 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
 
        CompareSummarizedReadCollections(checker, rc, expected)
 }
+
+type APITestData struct {
+       // path and response map
+       responses map[string]arvadostest.StubResponse
+
+       // expected error, if any
+       expectedError string
+}
+
+func (s *MySuite) TestGetCollectionsAndSummarize_DiscoveryError(c *C) {
+       testGetCollectionsAndSummarize(c,
+               APITestData{
+                       responses:     make(map[string]arvadostest.StubResponse),
+                       expectedError: "arvados API server error: 500.*",
+               })
+}
+
+func (s *MySuite) TestGetCollectionsAndSummarize_ApiErrorGetCollections(c *C) {
+       respMap := make(map[string]arvadostest.StubResponse)
+       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
+       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{-1, ``}
+
+       testGetCollectionsAndSummarize(c,
+               APITestData{
+                       responses:     respMap,
+                       expectedError: "arvados API server error: 302.*",
+               })
+}
+
+func (s *MySuite) TestGetCollectionsAndSummarize_GetCollectionsBadStreamName(c *C) {
+       respMap := make(map[string]arvadostest.StubResponse)
+       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
+       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{200, `{"items_available":1,"items":[{"modified_at":"2015-11-24T15:04:05Z","manifest_text":"badstreamname"}]}`}
+
+       testGetCollectionsAndSummarize(c,
+               APITestData{
+                       responses:     respMap,
+                       expectedError: "Invalid stream name: badstreamname",
+               })
+}
+
+func (s *MySuite) TestGetCollectionsAndSummarize_GetCollectionsBadFileToken(c *C) {
+       respMap := make(map[string]arvadostest.StubResponse)
+       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
+       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{200, `{"items_available":1,"items":[{"modified_at":"2015-11-24T15:04:05Z","manifest_text":"./goodstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt file2.txt"}]}`}
+
+       testGetCollectionsAndSummarize(c,
+               APITestData{
+                       responses:     respMap,
+                       expectedError: "Invalid file token: file2.txt",
+               })
+}
+
+func testGetCollectionsAndSummarize(c *C, testData APITestData) {
+       apiStub := arvadostest.ServerStub{testData.responses}
+
+       api := httptest.NewServer(&apiStub)
+       defer api.Close()
+
+       arv := arvadosclient.ArvadosClient{
+               Scheme:    "http",
+               ApiServer: api.URL[7:],
+               ApiToken:  "abc123",
+               Client:    &http.Client{Transport: &http.Transport{}},
+       }
+
+       // GetCollectionsAndSummarize
+       _, err := GetCollectionsAndSummarize(GetCollectionsParams{arv, nil, 10})
+
+       if testData.expectedError == "" {
+               c.Assert(err, IsNil)
+       } else {
+               c.Assert(err, ErrorMatches, testData.expectedError)
+       }
+}
index a9306ce83a6011002cef96b86eb6caf700feda23..604a6db2f4a16ac97aea4579111f0d909c6c5d9e 100644 (file)
@@ -22,6 +22,7 @@ var (
        logEventTypePrefix  string
        logFrequencySeconds int
        minutesBetweenRuns  int
+       dryRun              bool
 )
 
 func init() {
@@ -36,21 +37,34 @@ func init() {
        flag.IntVar(&minutesBetweenRuns,
                "minutes-between-runs",
                0,
-               "How many minutes we wait betwen data manager runs. 0 means run once and exit.")
+               "How many minutes we wait between data manager runs. 0 means run once and exit.")
+       flag.BoolVar(&dryRun,
+               "dry-run",
+               false,
+               "Perform a dry run. Log how many blocks would be deleted/moved, but do not issue any changes to keepstore.")
 }
 
 func main() {
        flag.Parse()
+
        if minutesBetweenRuns == 0 {
-               err := singlerun(makeArvadosClient())
+               arv, err := arvadosclient.MakeArvadosClient()
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error making arvados client: %v", err))
+               }
+               err = singlerun(arv)
                if err != nil {
-                       log.Fatalf("singlerun: %v", err)
+                       loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("singlerun: %v", err))
                }
        } else {
                waitTime := time.Minute * time.Duration(minutesBetweenRuns)
                for {
                        log.Println("Beginning Run")
-                       err := singlerun(makeArvadosClient())
+                       arv, err := arvadosclient.MakeArvadosClient()
+                       if err != nil {
+                               loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error making arvados client: %v", err))
+                       }
+                       err = singlerun(arv)
                        if err != nil {
                                log.Printf("singlerun: %v", err)
                        }
@@ -60,13 +74,7 @@ func main() {
        }
 }
 
-func makeArvadosClient() arvadosclient.ArvadosClient {
-       arv, err := arvadosclient.MakeArvadosClient()
-       if err != nil {
-               log.Fatalf("Error setting up arvados client: %s", err)
-       }
-       return arv
-}
+var arvLogger *logger.Logger
 
 func singlerun(arv arvadosclient.ArvadosClient) error {
        var err error
@@ -76,9 +84,8 @@ func singlerun(arv arvadosclient.ArvadosClient) error {
                return errors.New("Current user is not an admin. Datamanager requires a privileged token.")
        }
 
-       var arvLogger *logger.Logger
        if logEventTypePrefix != "" {
-               arvLogger = logger.NewLogger(logger.LoggerParams{
+               arvLogger, err = logger.NewLogger(logger.LoggerParams{
                        Client:          arv,
                        EventTypePrefix: logEventTypePrefix,
                        WriteInterval:   time.Second * time.Duration(logFrequencySeconds)})
@@ -101,9 +108,15 @@ func singlerun(arv arvadosclient.ArvadosClient) error {
                dataFetcher = BuildDataFetcher(arv)
        }
 
-       dataFetcher(arvLogger, &readCollections, &keepServerInfo)
+       err = dataFetcher(arvLogger, &readCollections, &keepServerInfo)
+       if err != nil {
+               return err
+       }
 
-       summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
+       err = summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
+       if err != nil {
+               return err
+       }
 
        buckets := summary.BucketReplication(readCollections, keepServerInfo)
        bucketCounts := buckets.Counts()
@@ -126,8 +139,7 @@ func singlerun(arv arvadosclient.ArvadosClient) error {
 
        kc, err := keepclient.MakeKeepClient(&arv)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Error setting up keep client %s", err.Error()))
+               return fmt.Errorf("Error setting up keep client %v", err.Error())
        }
 
        // Log that we're finished. We force the recording, since go will
@@ -154,37 +166,50 @@ func singlerun(arv arvadosclient.ArvadosClient) error {
                &keepServerInfo,
                replicationSummary.KeepBlocksNotInCollections)
 
-       summary.WritePullLists(arvLogger, pullLists)
+       err = summary.WritePullLists(arvLogger, pullLists, dryRun)
+       if err != nil {
+               return err
+       }
 
        if trashErr != nil {
                return err
        }
-       keep.SendTrashLists(kc, trashLists)
+       keep.SendTrashLists(arvLogger, kc, trashLists, dryRun)
 
        return nil
 }
 
 // BuildDataFetcher returns a data fetcher that fetches data from remote servers.
 func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher {
-       return func(arvLogger *logger.Logger,
+       return func(
+               arvLogger *logger.Logger,
                readCollections *collection.ReadCollections,
-               keepServerInfo *keep.ReadServers) {
-               collectionChannel := make(chan collection.ReadCollections)
-
+               keepServerInfo *keep.ReadServers,
+       ) error {
+               collDone := make(chan struct{})
+               var collErr error
                go func() {
-                       collectionChannel <- collection.GetCollectionsAndSummarize(
+                       *readCollections, collErr = collection.GetCollectionsAndSummarize(
                                collection.GetCollectionsParams{
                                        Client:    arv,
                                        Logger:    arvLogger,
                                        BatchSize: 50})
+                       collDone <- struct{}{}
                }()
 
-               *keepServerInfo = keep.GetKeepServersAndSummarize(
+               var keepErr error
+               *keepServerInfo, keepErr = keep.GetKeepServersAndSummarize(
                        keep.GetKeepServersParams{
                                Client: arv,
                                Logger: arvLogger,
                                Limit:  1000})
 
-               *readCollections = <-collectionChannel
+               <- collDone
+
+               // Return a nil error only if both parts succeeded.
+               if collErr != nil {
+                       return collErr
+               }
+               return keepErr
        }
 }
index 3d9bb3da90a2c828207318908f1ee083d0d0d5f3..67b0e79f14fea4eb75a64d12f8907b9eabf4bf14 100644 (file)
@@ -6,6 +6,8 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "git.curoverse.com/arvados.git/services/datamanager/collection"
+       "git.curoverse.com/arvados.git/services/datamanager/summary"
        "io/ioutil"
        "net/http"
        "os"
@@ -16,11 +18,6 @@ import (
        "time"
 )
 
-const (
-       ActiveUserToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
-       AdminToken      = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-)
-
 var arv arvadosclient.ArvadosClient
 var keepClient *keepclient.KeepClient
 var keepServers []string
@@ -31,20 +28,24 @@ func SetupDataManagerTest(t *testing.T) {
        // start api and keep servers
        arvadostest.ResetEnv()
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 
-       arv = makeArvadosClient()
+       var err error
+       arv, err = arvadosclient.MakeArvadosClient()
+       if err != nil {
+               t.Fatalf("Error making arvados client: %s", err)
+       }
+       arv.ApiToken = arvadostest.DataManagerToken
 
        // keep client
        keepClient = &keepclient.KeepClient{
                Arvados:       &arv,
                Want_replicas: 2,
-               Using_proxy:   true,
                Client:        &http.Client{},
        }
 
        // discover keep services
-       if err := keepClient.DiscoverKeepServers(); err != nil {
+       if err = keepClient.DiscoverKeepServers(); err != nil {
                t.Fatalf("Error discovering keep services: %s", err)
        }
        keepServers = []string{}
@@ -54,8 +55,10 @@ func SetupDataManagerTest(t *testing.T) {
 }
 
 func TearDownDataManagerTest(t *testing.T) {
-       arvadostest.StopKeep()
+       arvadostest.StopKeep(2)
        arvadostest.StopAPI()
+       summary.WriteDataTo = ""
+       collection.HeapProfileFilename = ""
 }
 
 func putBlock(t *testing.T, data string) string {
@@ -124,7 +127,18 @@ func getFirstLocatorFromCollection(t *testing.T, uuid string) string {
        return match[1] + "+" + match[2]
 }
 
+func switchToken(t string) func() {
+       orig := arv.ApiToken
+       restore := func() {
+               arv.ApiToken = orig
+       }
+       arv.ApiToken = t
+       return restore
+}
+
 func getCollection(t *testing.T, uuid string) Dict {
+       defer switchToken(arvadostest.AdminToken)()
+
        getback := make(Dict)
        err := arv.Get("collections", uuid, nil, &getback)
        if err != nil {
@@ -138,6 +152,8 @@ func getCollection(t *testing.T, uuid string) Dict {
 }
 
 func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
+       defer switchToken(arvadostest.AdminToken)()
+
        err := arv.Update("collections", uuid, arvadosclient.Dict{
                "collection": arvadosclient.Dict{
                        paramName: paramValue,
@@ -152,6 +168,8 @@ func updateCollection(t *testing.T, uuid string, paramName string, paramValue st
 type Dict map[string]interface{}
 
 func deleteCollection(t *testing.T, uuid string) {
+       defer switchToken(arvadostest.AdminToken)()
+
        getback := make(Dict)
        err := arv.Delete("collections", uuid, nil, &getback)
        if err != nil {
@@ -175,7 +193,7 @@ func getBlockIndexesForServer(t *testing.T, i int) []string {
        path := keepServers[i] + "/index"
        client := http.Client{}
        req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
        req.Header.Add("Content-Type", "application/octet-stream")
        resp, err := client.Do(req)
        defer resp.Body.Close()
@@ -239,13 +257,10 @@ func valueInArray(value string, list []string) bool {
        return false
 }
 
-/*
-Test env uses two keep volumes. The volume names can be found by reading the files
-  ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
-
-The keep volumes are of the dir structure:
-  volumeN/subdir/locator
-*/
+// Test env uses two keep volumes. The volume names can be found by reading the files
+// ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
+//
+// The keep volumes are of the dir structure: volumeN/subdir/locator
 func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
        // First get rid of any size hints in the locators
        var trimmedBlockLocators []string
@@ -297,7 +312,7 @@ func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
 func getStatus(t *testing.T, path string) interface{} {
        client := http.Client{}
        req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
        req.Header.Add("Content-Type", "application/octet-stream")
        resp, err := client.Do(req)
        if err != nil {
@@ -327,11 +342,9 @@ func waitUntilQueuesFinishWork(t *testing.T) {
        }
 }
 
-/*
-Create some blocks and backdate some of them.
-Also create some collections and delete some of them.
-Verify block indexes.
-*/
+// Create some blocks and backdate some of them.
+// Also create some collections and delete some of them.
+// Verify block indexes.
 func TestPutAndGetBlocks(t *testing.T) {
        defer TearDownDataManagerTest(t)
        SetupDataManagerTest(t)
@@ -389,6 +402,10 @@ func TestPutAndGetBlocks(t *testing.T) {
                t.Fatalf("Locators for both these collections expected to be same: %s %s", oneOfTwoWithSameDataLocator, secondOfTwoWithSameDataLocator)
        }
 
+       // create collection with empty manifest text
+       emptyBlockLocator := putBlock(t, "")
+       emptyCollection := createCollection(t, "")
+
        // Verify blocks before doing any backdating / deleting.
        var expected []string
        expected = append(expected, oldUnusedBlockLocators...)
@@ -397,6 +414,7 @@ func TestPutAndGetBlocks(t *testing.T) {
        expected = append(expected, replicationCollectionLocator)
        expected = append(expected, oneOfTwoWithSameDataLocator)
        expected = append(expected, secondOfTwoWithSameDataLocator)
+       expected = append(expected, emptyBlockLocator)
 
        verifyBlocks(t, nil, expected, 2)
 
@@ -410,6 +428,8 @@ func TestPutAndGetBlocks(t *testing.T) {
        backdateBlocks(t, oldUnusedBlockLocators)
        deleteCollection(t, toBeDeletedCollectionUUID)
        deleteCollection(t, secondOfTwoWithSameDataUUID)
+       backdateBlocks(t, []string{emptyBlockLocator})
+       deleteCollection(t, emptyCollection)
 
        // Run data manager again
        dataManagerSingleRun(t)
@@ -422,6 +442,7 @@ func TestPutAndGetBlocks(t *testing.T) {
        expected = append(expected, toBeDeletedCollectionLocator)
        expected = append(expected, oneOfTwoWithSameDataLocator)
        expected = append(expected, secondOfTwoWithSameDataLocator)
+       expected = append(expected, emptyBlockLocator) // even when unreferenced, this remains
 
        verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
 
@@ -504,10 +525,187 @@ func TestRunDatamanagerAsNonAdminUser(t *testing.T) {
        defer TearDownDataManagerTest(t)
        SetupDataManagerTest(t)
 
-       arv.ApiToken = ActiveUserToken
+       arv.ApiToken = arvadostest.ActiveToken
 
        err := singlerun(arv)
        if err == nil {
                t.Fatalf("Expected error during singlerun as non-admin user")
        }
 }
+
+func TestPutAndGetBlocks_NoErrorDuringSingleRun(t *testing.T) {
+       testOldBlocksNotDeletedOnDataManagerError(t, "", "", false, false)
+}
+
+func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadWriteTo(t *testing.T) {
+       testOldBlocksNotDeletedOnDataManagerError(t, "/badwritetofile", "", true, true)
+}
+
+func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadHeapProfileFilename(t *testing.T) {
+       testOldBlocksNotDeletedOnDataManagerError(t, "", "/badheapprofilefile", true, true)
+}
+
+// Create some blocks and backdate some of them.
+// Run datamanager while producing an error condition.
+// Verify that the blocks are hence not deleted.
+func testOldBlocksNotDeletedOnDataManagerError(t *testing.T, writeDataTo string, heapProfileFile string, expectError bool, expectOldBlocks bool) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       // Put some blocks and backdate them.
+       var oldUnusedBlockLocators []string
+       oldUnusedBlockData := "this block will have older mtime"
+       for i := 0; i < 5; i++ {
+               oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
+       }
+       backdateBlocks(t, oldUnusedBlockLocators)
+
+       // Run data manager
+       summary.WriteDataTo = writeDataTo
+       collection.HeapProfileFilename = heapProfileFile
+
+       err := singlerun(arv)
+       if !expectError {
+               if err != nil {
+                       t.Fatalf("Got an error during datamanager singlerun: %v", err)
+               }
+       } else {
+               if err == nil {
+                       t.Fatalf("Expected error during datamanager singlerun")
+               }
+       }
+       waitUntilQueuesFinishWork(t)
+
+       // Get block indexes and verify that all backdated blocks are not/deleted as expected
+       if expectOldBlocks {
+               verifyBlocks(t, nil, oldUnusedBlockLocators, 2)
+       } else {
+               verifyBlocks(t, oldUnusedBlockLocators, nil, 2)
+       }
+}
+
+// Create a collection with multiple streams and blocks
+func createMultiStreamBlockCollection(t *testing.T, data string, numStreams, numBlocks int) (string, []string) {
+       defer switchToken(arvadostest.AdminToken)()
+
+       manifest := ""
+       locators := make(map[string]bool)
+       for s := 0; s < numStreams; s++ {
+               manifest += fmt.Sprintf("./stream%d ", s)
+               for b := 0; b < numBlocks; b++ {
+                       locator, _, err := keepClient.PutB([]byte(fmt.Sprintf("%s in stream %d and block %d", data, s, b)))
+                       if err != nil {
+                               t.Fatalf("Error creating block %d in stream %d: %v", b, s, err)
+                       }
+                       locators[strings.Split(locator, "+A")[0]] = true
+                       manifest += locator + " "
+               }
+               manifest += "0:1:dummyfile.txt\n"
+       }
+
+       collection := make(Dict)
+       err := arv.Create("collections",
+               arvadosclient.Dict{"collection": arvadosclient.Dict{"manifest_text": manifest}},
+               &collection)
+
+       if err != nil {
+               t.Fatalf("Error creating collection %v", err)
+       }
+
+       var locs []string
+       for k := range locators {
+               locs = append(locs, k)
+       }
+
+       return collection["uuid"].(string), locs
+}
+
+// Create collection with multiple streams and blocks; backdate the blocks and but do not delete the collection.
+// Also, create stray block and backdate it.
+// After datamanager run: expect blocks from the collection, but not the stray block.
+func TestManifestWithMultipleStreamsAndBlocks(t *testing.T) {
+       testManifestWithMultipleStreamsAndBlocks(t, 100, 10, "", false)
+}
+
+// Same test as TestManifestWithMultipleStreamsAndBlocks with an additional
+// keepstore of a service type other than "disk". Only the "disk" type services
+// will be indexed by datamanager and hence should work the same way.
+func TestManifestWithMultipleStreamsAndBlocks_WithOneUnsupportedKeepServer(t *testing.T) {
+       testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "testblobstore", false)
+}
+
+// Test datamanager with dry-run. Expect no block to be deleted.
+func TestManifestWithMultipleStreamsAndBlocks_DryRun(t *testing.T) {
+       testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "", true)
+}
+
+func testManifestWithMultipleStreamsAndBlocks(t *testing.T, numStreams, numBlocks int, createExtraKeepServerWithType string, isDryRun bool) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       // create collection whose blocks will be backdated
+       collectionWithOldBlocks, oldBlocks := createMultiStreamBlockCollection(t, "old block", numStreams, numBlocks)
+       if collectionWithOldBlocks == "" {
+               t.Fatalf("Failed to create collection with %d blocks", numStreams*numBlocks)
+       }
+       if len(oldBlocks) != numStreams*numBlocks {
+               t.Fatalf("Not all blocks are created: expected %v, found %v", 1000, len(oldBlocks))
+       }
+
+       // create a stray block that will be backdated
+       strayOldBlock := putBlock(t, "this stray block is old")
+
+       expected := []string{strayOldBlock}
+       expected = append(expected, oldBlocks...)
+       verifyBlocks(t, nil, expected, 2)
+
+       // Backdate old blocks; but the collection still references these blocks
+       backdateBlocks(t, oldBlocks)
+
+       // also backdate the stray old block
+       backdateBlocks(t, []string{strayOldBlock})
+
+       // If requested, create an extra keepserver with the given type
+       // This should be ignored during indexing and hence not change the datamanager outcome
+       var extraKeepServerUUID string
+       if createExtraKeepServerWithType != "" {
+               extraKeepServerUUID = addExtraKeepServer(t, createExtraKeepServerWithType)
+               defer deleteExtraKeepServer(extraKeepServerUUID)
+       }
+
+       // run datamanager
+       dryRun = isDryRun
+       dataManagerSingleRun(t)
+
+       if dryRun {
+               // verify that all blocks, including strayOldBlock, are still to be found
+               verifyBlocks(t, nil, expected, 2)
+       } else {
+               // verify that strayOldBlock is not to be found, but the collections blocks are still there
+               verifyBlocks(t, []string{strayOldBlock}, oldBlocks, 2)
+       }
+}
+
+// Add one more keepstore with the given service type
+func addExtraKeepServer(t *testing.T, serviceType string) string {
+       defer switchToken(arvadostest.AdminToken)()
+
+       extraKeepService := make(arvadosclient.Dict)
+       err := arv.Create("keep_services",
+               arvadosclient.Dict{"keep_service": arvadosclient.Dict{
+                       "service_host":     "localhost",
+                       "service_port":     "21321",
+                       "service_ssl_flag": false,
+                       "service_type":     serviceType}},
+               &extraKeepService)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       return extraKeepService["uuid"].(string)
+}
+
+func deleteExtraKeepServer(uuid string) {
+       defer switchToken(arvadostest.AdminToken)()
+       arv.Delete("keep_services", uuid, nil, nil)
+}
index 86c2b089aa13088d8da8f524ab21dcc8dafc9641..206a9c43fd4878babf0d9a5340a68b787b15b71a 100644 (file)
@@ -6,12 +6,12 @@ import (
        "bufio"
        "encoding/json"
        "errors"
+       "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
        "io"
        "io/ioutil"
        "log"
@@ -23,7 +23,7 @@ import (
 
 // ServerAddress struct
 type ServerAddress struct {
-       SSL         bool   `json:service_ssl_flag`
+       SSL         bool   `json:"service_ssl_flag"`
        Host        string `json:"service_host"`
        Port        int    `json:"service_port"`
        UUID        string `json:"uuid"`
@@ -51,6 +51,7 @@ type ServerContents struct {
 type ServerResponse struct {
        Address  ServerAddress
        Contents ServerContents
+       Err      error
 }
 
 // ReadServers struct
@@ -76,6 +77,15 @@ type ServiceList struct {
        KeepServers    []ServerAddress `json:"items"`
 }
 
+var serviceType string
+
+func init() {
+       flag.StringVar(&serviceType,
+               "service-type",
+               "disk",
+               "Operate only on keep_services with the specified service_type, ignoring all others.")
+}
+
 // String
 // TODO(misha): Change this to include the UUID as well.
 func (s ServerAddress) String() string {
@@ -91,8 +101,11 @@ func (s ServerAddress) URL() string {
 }
 
 // GetKeepServersAndSummarize gets keep servers from api
-func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServers) {
-       results = GetKeepServers(params)
+func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServers, err error) {
+       results, err = GetKeepServers(params)
+       if err != nil {
+               return
+       }
        log.Printf("Returned %d keep disks", len(results.ServerToContents))
 
        results.Summarize(params.Logger)
@@ -103,7 +116,7 @@ func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServer
 }
 
 // GetKeepServers from api server
-func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
+func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error) {
        sdkParams := arvadosclient.Dict{
                "filters": [][]string{[]string{"service_type", "!=", "proxy"}},
        }
@@ -112,38 +125,42 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
        }
 
        var sdkResponse ServiceList
-       err := params.Client.List("keep_services", sdkParams, &sdkResponse)
+       err = params.Client.List("keep_services", sdkParams, &sdkResponse)
 
        if err != nil {
-               loggerutil.FatalWithMessage(params.Logger,
-                       fmt.Sprintf("Error requesting keep disks from API server: %v", err))
+               return
        }
 
-       // Currently, only "disk" types are supported. Stop if any other service types are found.
+       var keepServers []ServerAddress
        for _, server := range sdkResponse.KeepServers {
-               if server.ServiceType != "disk" {
-                       loggerutil.FatalWithMessage(params.Logger,
-                               fmt.Sprintf("Unsupported service type %q found for: %v", server.ServiceType, server))
+               if server.ServiceType == serviceType {
+                       keepServers = append(keepServers, server)
+               } else {
+                       log.Printf("Skipping keep_service %q because its service_type %q does not match -service-type=%q", server, server.ServiceType, serviceType)
                }
        }
 
+       if len(keepServers) == 0 {
+               return results, fmt.Errorf("Found no keepservices with the service type %v", serviceType)
+       }
+
        if params.Logger != nil {
                params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
                        keepInfo := logger.GetOrCreateMap(p, "keep_info")
                        keepInfo["num_keep_servers_available"] = sdkResponse.ItemsAvailable
                        keepInfo["num_keep_servers_received"] = len(sdkResponse.KeepServers)
                        keepInfo["keep_servers"] = sdkResponse.KeepServers
+                       keepInfo["indexable_keep_servers"] = keepServers
                })
        }
 
        log.Printf("Received keep services list: %+v", sdkResponse)
 
        if len(sdkResponse.KeepServers) < sdkResponse.ItemsAvailable {
-               loggerutil.FatalWithMessage(params.Logger,
-                       fmt.Sprintf("Did not receive all available keep servers: %+v", sdkResponse))
+               return results, fmt.Errorf("Did not receive all available keep servers: %+v", sdkResponse)
        }
 
-       results.KeepServerIndexToAddress = sdkResponse.KeepServers
+       results.KeepServerIndexToAddress = keepServers
        results.KeepServerAddressToIndex = make(map[ServerAddress]int)
        for i, address := range results.KeepServerIndexToAddress {
                results.KeepServerAddressToIndex[address] = i
@@ -153,7 +170,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
 
        // Send off all the index requests concurrently
        responseChan := make(chan ServerResponse)
-       for _, keepServer := range sdkResponse.KeepServers {
+       for _, keepServer := range results.KeepServerIndexToAddress {
                // The above keepsServer variable is reused for each iteration, so
                // it would be shared across all goroutines. This would result in
                // us querying one server n times instead of n different servers
@@ -171,9 +188,15 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
        results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
 
        // Read all the responses
-       for i := range sdkResponse.KeepServers {
+       for i := range results.KeepServerIndexToAddress {
                _ = i // Here to prevent go from complaining.
                response := <-responseChan
+
+               // Check if there were any errors during GetServerContents
+               if response.Err != nil {
+                       return results, response.Err
+               }
+
                log.Printf("Received channel response from %v containing %d files",
                        response.Address,
                        len(response.Contents.BlockDigestToInfo))
@@ -194,25 +217,37 @@ func GetServerContents(arvLogger *logger.Logger,
        keepServer ServerAddress,
        arv arvadosclient.ArvadosClient) (response ServerResponse) {
 
-       GetServerStatus(arvLogger, keepServer, arv)
+       err := GetServerStatus(arvLogger, keepServer, arv)
+       if err != nil {
+               response.Err = err
+               return
+       }
+
+       req, err := CreateIndexRequest(arvLogger, keepServer, arv)
+       if err != nil {
+               response.Err = err
+               return
+       }
 
-       req := CreateIndexRequest(arvLogger, keepServer, arv)
        resp, err := arv.Client.Do(req)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Error fetching %s: %v. Response was %+v",
-                               req.URL.String(),
-                               err,
-                               resp))
+               response.Err = err
+               return
+       }
+
+       response, err = ReadServerResponse(arvLogger, keepServer, resp)
+       if err != nil {
+               response.Err = err
+               return
        }
 
-       return ReadServerResponse(arvLogger, keepServer, resp)
+       return
 }
 
 // GetServerStatus get keep server status by invoking /status.json
 func GetServerStatus(arvLogger *logger.Logger,
        keepServer ServerAddress,
-       arv arvadosclient.ArvadosClient) {
+       arv arvadosclient.ArvadosClient) error {
        url := fmt.Sprintf("http://%s:%d/status.json",
                keepServer.Host,
                keepServer.Port)
@@ -232,13 +267,11 @@ func GetServerStatus(arvLogger *logger.Logger,
 
        resp, err := arv.Client.Get(url)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Error getting keep status from %s: %v", url, err))
+               return fmt.Errorf("Error getting keep status from %s: %v", url, err)
        } else if resp.StatusCode != 200 {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Received error code %d in response to request "+
-                               "for %s status: %s",
-                               resp.StatusCode, url, resp.Status))
+               return fmt.Errorf("Received error code %d in response to request "+
+                       "for %s status: %s",
+                       resp.StatusCode, url, resp.Status)
        }
 
        var keepStatus map[string]interface{}
@@ -246,8 +279,7 @@ func GetServerStatus(arvLogger *logger.Logger,
        decoder.UseNumber()
        err = decoder.Decode(&keepStatus)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Error decoding keep status from %s: %v", url, err))
+               return fmt.Errorf("Error decoding keep status from %s: %v", url, err)
        }
 
        if arvLogger != nil {
@@ -259,12 +291,14 @@ func GetServerStatus(arvLogger *logger.Logger,
                        serverInfo["status"] = keepStatus
                })
        }
+
+       return nil
 }
 
 // CreateIndexRequest to the keep server
 func CreateIndexRequest(arvLogger *logger.Logger,
        keepServer ServerAddress,
-       arv arvadosclient.ArvadosClient) (req *http.Request) {
+       arv arvadosclient.ArvadosClient) (req *http.Request, err error) {
        url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port)
        log.Println("About to fetch keep server contents from " + url)
 
@@ -277,26 +311,23 @@ func CreateIndexRequest(arvLogger *logger.Logger,
                })
        }
 
-       req, err := http.NewRequest("GET", url, nil)
+       req, err = http.NewRequest("GET", url, nil)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Error building http request for %s: %v", url, err))
+               return req, fmt.Errorf("Error building http request for %s: %v", url, err)
        }
 
        req.Header.Add("Authorization", "OAuth2 "+arv.ApiToken)
-       return
+       return req, err
 }
 
 // ReadServerResponse reads reasponse from keep server
 func ReadServerResponse(arvLogger *logger.Logger,
        keepServer ServerAddress,
-       resp *http.Response) (response ServerResponse) {
+       resp *http.Response) (response ServerResponse, err error) {
 
        if resp.StatusCode != 200 {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Received error code %d in response to request "+
-                               "for %s index: %s",
-                               resp.StatusCode, keepServer.String(), resp.Status))
+               return response, fmt.Errorf("Received error code %d in response to index request for %s: %s",
+                       resp.StatusCode, keepServer.String(), resp.Status)
        }
 
        if arvLogger != nil {
@@ -317,35 +348,30 @@ func ReadServerResponse(arvLogger *logger.Logger,
                numLines++
                line, err := reader.ReadString('\n')
                if err == io.EOF {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Index from %s truncated at line %d",
-                                       keepServer.String(), numLines))
+                       return response, fmt.Errorf("Index from %s truncated at line %d",
+                               keepServer.String(), numLines)
                } else if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Error reading index response from %s at line %d: %v",
-                                       keepServer.String(), numLines, err))
+                       return response, fmt.Errorf("Error reading index response from %s at line %d: %v",
+                               keepServer.String(), numLines, err)
                }
                if line == "\n" {
                        if _, err := reader.Peek(1); err == nil {
                                extra, _ := reader.ReadString('\n')
-                               loggerutil.FatalWithMessage(arvLogger,
-                                       fmt.Sprintf("Index from %s had trailing data at line %d after EOF marker: %s",
-                                               keepServer.String(), numLines+1, extra))
+                               return response, fmt.Errorf("Index from %s had trailing data at line %d after EOF marker: %s",
+                                       keepServer.String(), numLines+1, extra)
                        } else if err != io.EOF {
-                               loggerutil.FatalWithMessage(arvLogger,
-                                       fmt.Sprintf("Index from %s had read error after EOF marker at line %d: %v",
-                                               keepServer.String(), numLines, err))
+                               return response, fmt.Errorf("Index from %s had read error after EOF marker at line %d: %v",
+                                       keepServer.String(), numLines, err)
                        }
                        numLines--
                        break
                }
                blockInfo, err := parseBlockInfoFromIndexLine(line)
                if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Error parsing BlockInfo from index line "+
-                                       "received from %s: %v",
-                                       keepServer.String(),
-                                       err))
+                       return response, fmt.Errorf("Error parsing BlockInfo from index line "+
+                               "received from %s: %v",
+                               keepServer.String(),
+                               err)
                }
 
                if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
@@ -387,19 +413,19 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
        tokens := strings.Fields(indexLine)
        if len(tokens) != 2 {
                err = fmt.Errorf("Expected 2 tokens per line but received a "+
-                       "line containing %v instead.",
+                       "line containing %#q instead.",
                        tokens)
        }
 
        var locator blockdigest.BlockLocator
        if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
-               err = fmt.Errorf("%v Received error while parsing line \"%s\"",
+               err = fmt.Errorf("%v Received error while parsing line \"%#q\"",
                        err, indexLine)
                return
        }
        if len(locator.Hints) > 0 {
                err = fmt.Errorf("Block locator in index line should not contain hints "+
-                       "but it does: %v",
+                       "but it does: %#q",
                        locator)
                return
        }
@@ -440,13 +466,29 @@ type TrashRequest struct {
 type TrashList []TrashRequest
 
 // SendTrashLists to trash queue
-func SendTrashLists(kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) {
+func SendTrashLists(arvLogger *logger.Logger, kc *keepclient.KeepClient, spl map[string]TrashList, dryRun bool) (errs []error) {
        count := 0
        barrier := make(chan error)
 
        client := kc.Client
 
        for url, v := range spl {
+               if arvLogger != nil {
+                       // We need a local variable because Update doesn't call our mutator func until later,
+                       // when our list variable might have been reused by the next loop iteration.
+                       url := url
+                       trashLen := len(v)
+                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
+                               trashListInfo := logger.GetOrCreateMap(p, "trash_list_len")
+                               trashListInfo[url] = trashLen
+                       })
+               }
+
+               if dryRun {
+                       log.Printf("dry run, not sending trash list to service %s with %d blocks", url, len(v))
+                       continue
+               }
+
                count++
                log.Printf("Sending trash list to %v", url)
 
@@ -486,7 +528,6 @@ func SendTrashLists(kc *keepclient.KeepClient, spl map[string]TrashList) (errs [
                                barrier <- nil
                        }
                })(url, v)
-
        }
 
        for i := 0; i < count; i++ {
index 2ccf17d45f78419b63fc5074ab9208c7e2e2c3c0..79ff3f8f0763b1c2452e63afe9b8d553fbab84b1 100644 (file)
@@ -2,11 +2,21 @@ package keep
 
 import (
        "encoding/json"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       . "gopkg.in/check.v1"
+       "fmt"
+       "net"
        "net/http"
        "net/http/httptest"
+       "net/url"
+       "strconv"
+       "strings"
        "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+
+       . "gopkg.in/check.v1"
 )
 
 // Gocheck boilerplate
@@ -30,17 +40,18 @@ func (ts *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request)
 func (s *KeepSuite) TestSendTrashLists(c *C) {
        th := TestHandler{}
        server := httptest.NewServer(&th)
+       defer server.Close()
 
        tl := map[string]TrashList{
                server.URL: TrashList{TrashRequest{"000000000000000000000000deadbeef", 99}}}
 
-       kc := keepclient.KeepClient{Client: &http.Client{}}
+       arv := arvadosclient.ArvadosClient{ApiToken: "abc123"}
+       kc := keepclient.KeepClient{Arvados: &arv, Client: &http.Client{}}
        kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
                map[string]string{"xxxx": server.URL},
                map[string]string{})
 
-       err := SendTrashLists("", &kc, tl)
-       server.Close()
+       err := SendTrashLists(nil, &kc, tl, false)
 
        c.Check(err, IsNil)
 
@@ -61,21 +72,207 @@ func sendTrashListError(c *C, server *httptest.Server) {
        tl := map[string]TrashList{
                server.URL: TrashList{TrashRequest{"000000000000000000000000deadbeef", 99}}}
 
-       kc := keepclient.KeepClient{Client: &http.Client{}}
+       arv := arvadosclient.ArvadosClient{ApiToken: "abc123"}
+       kc := keepclient.KeepClient{Arvados: &arv, Client: &http.Client{}}
        kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
                map[string]string{"xxxx": server.URL},
                map[string]string{})
 
-       err := SendTrashLists("", &kc, tl)
+       err := SendTrashLists(nil, &kc, tl, false)
 
        c.Check(err, NotNil)
        c.Check(err[0], NotNil)
 }
 
 func (s *KeepSuite) TestSendTrashListErrorResponse(c *C) {
-       sendTrashListError(c, httptest.NewServer(&TestHandlerError{}))
+       server := httptest.NewServer(&TestHandlerError{})
+       sendTrashListError(c, server)
+       defer server.Close()
 }
 
 func (s *KeepSuite) TestSendTrashListUnreachable(c *C) {
        sendTrashListError(c, httptest.NewUnstartedServer(&TestHandler{}))
 }
+
+type APITestData struct {
+       numServers int
+       serverType string
+       statusCode int
+}
+
+func (s *KeepSuite) TestGetKeepServers_UnsupportedServiceType(c *C) {
+       testGetKeepServersFromAPI(c, APITestData{1, "notadisk", 200}, "Found no keepservices with the service type disk")
+}
+
+func (s *KeepSuite) TestGetKeepServers_ReceivedTooFewServers(c *C) {
+       testGetKeepServersFromAPI(c, APITestData{2, "disk", 200}, "Did not receive all available keep servers")
+}
+
+func (s *KeepSuite) TestGetKeepServers_ServerError(c *C) {
+       testGetKeepServersFromAPI(c, APITestData{-1, "disk", -1}, "arvados API server error")
+}
+
+func testGetKeepServersFromAPI(c *C, testData APITestData, expectedError string) {
+       keepServers := ServiceList{
+               ItemsAvailable: testData.numServers,
+               KeepServers: []ServerAddress{{
+                       SSL:         false,
+                       Host:        "example.com",
+                       Port:        12345,
+                       UUID:        "abcdefg",
+                       ServiceType: testData.serverType,
+               }},
+       }
+
+       ksJSON, _ := json.Marshal(keepServers)
+       apiStubResponses := make(map[string]arvadostest.StubResponse)
+       apiStubResponses["/arvados/v1/keep_services"] = arvadostest.StubResponse{testData.statusCode, string(ksJSON)}
+       apiStub := arvadostest.ServerStub{apiStubResponses}
+
+       api := httptest.NewServer(&apiStub)
+       defer api.Close()
+
+       arv := arvadosclient.ArvadosClient{
+               Scheme:    "http",
+               ApiServer: api.URL[7:],
+               ApiToken:  "abc123",
+               Client:    &http.Client{Transport: &http.Transport{}},
+       }
+
+       kc := keepclient.KeepClient{Arvados: &arv, Client: &http.Client{}}
+       kc.SetServiceRoots(map[string]string{"xxxx": "http://example.com:23456"},
+               map[string]string{"xxxx": "http://example.com:23456"},
+               map[string]string{})
+
+       params := GetKeepServersParams{
+               Client: arv,
+               Logger: nil,
+               Limit:  10,
+       }
+
+       _, err := GetKeepServersAndSummarize(params)
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, fmt.Sprintf(".*%s.*", expectedError))
+}
+
+type KeepServerTestData struct {
+       // handle /status.json
+       statusStatusCode int
+
+       // handle /index
+       indexStatusCode   int
+       indexResponseBody string
+
+       // expected error, if any
+       expectedError string
+}
+
+func (s *KeepSuite) TestGetKeepServers_ErrorGettingKeepServerStatus(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{500, 200, "ok",
+               ".*http://.* 500 Internal Server Error"})
+}
+
+func (s *KeepSuite) TestGetKeepServers_GettingIndex(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{200, -1, "notok",
+               ".*redirect-loop.*"})
+}
+
+func (s *KeepSuite) TestGetKeepServers_ErrorReadServerResponse(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 500, "notok",
+               ".*http://.* 500 Internal Server Error"})
+}
+
+func (s *KeepSuite) TestGetKeepServers_ReadServerResponseTuncatedAtLineOne(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200,
+               "notterminatedwithnewline", "Index from http://.* truncated at line 1"})
+}
+
+func (s *KeepSuite) TestGetKeepServers_InvalidBlockLocatorPattern(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200, "testing\n",
+               "Error parsing BlockInfo from index line.*"})
+}
+
+func (s *KeepSuite) TestGetKeepServers_ReadServerResponseEmpty(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200, "\n", ""})
+}
+
+func (s *KeepSuite) TestGetKeepServers_ReadServerResponseWithTwoBlocks(c *C) {
+       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200,
+               "51752ba076e461ec9ec1d27400a08548+20 1447526361\na048cc05c02ba1ee43ad071274b9e547+52 1447526362\n\n", ""})
+}
+
+func testGetKeepServersAndSummarize(c *C, testData KeepServerTestData) {
+       ksStubResponses := make(map[string]arvadostest.StubResponse)
+       ksStubResponses["/status.json"] = arvadostest.StubResponse{testData.statusStatusCode, string(`{}`)}
+       ksStubResponses["/index"] = arvadostest.StubResponse{testData.indexStatusCode, testData.indexResponseBody}
+       ksStub := arvadostest.ServerStub{ksStubResponses}
+       ks := httptest.NewServer(&ksStub)
+       defer ks.Close()
+
+       ksURL, err := url.Parse(ks.URL)
+       c.Check(err, IsNil)
+       ksHost, port, err := net.SplitHostPort(ksURL.Host)
+       ksPort, err := strconv.Atoi(port)
+       c.Check(err, IsNil)
+
+       servers_list := ServiceList{
+               ItemsAvailable: 1,
+               KeepServers: []ServerAddress{{
+                       SSL:         false,
+                       Host:        ksHost,
+                       Port:        ksPort,
+                       UUID:        "abcdefg",
+                       ServiceType: "disk",
+               }},
+       }
+       ksJSON, _ := json.Marshal(servers_list)
+       apiStubResponses := make(map[string]arvadostest.StubResponse)
+       apiStubResponses["/arvados/v1/keep_services"] = arvadostest.StubResponse{200, string(ksJSON)}
+       apiStub := arvadostest.ServerStub{apiStubResponses}
+
+       api := httptest.NewServer(&apiStub)
+       defer api.Close()
+
+       arv := arvadosclient.ArvadosClient{
+               Scheme:    "http",
+               ApiServer: api.URL[7:],
+               ApiToken:  "abc123",
+               Client:    &http.Client{Transport: &http.Transport{}},
+       }
+
+       kc := keepclient.KeepClient{Arvados: &arv, Client: &http.Client{}}
+       kc.SetServiceRoots(map[string]string{"xxxx": ks.URL},
+               map[string]string{"xxxx": ks.URL},
+               map[string]string{})
+
+       params := GetKeepServersParams{
+               Client: arv,
+               Logger: nil,
+               Limit:  10,
+       }
+
+       // GetKeepServersAndSummarize
+       results, err := GetKeepServersAndSummarize(params)
+
+       if testData.expectedError == "" {
+               c.Assert(err, IsNil)
+               c.Assert(results, NotNil)
+
+               blockToServers := results.BlockToServers
+
+               blockLocators := strings.Split(testData.indexResponseBody, "\n")
+               for _, loc := range blockLocators {
+                       locator := strings.Split(loc, " ")[0]
+                       if locator != "" {
+                               blockLocator, err := blockdigest.ParseBlockLocator(locator)
+                               c.Assert(err, IsNil)
+
+                               blockDigestWithSize := blockdigest.DigestWithSize{blockLocator.Digest, uint32(blockLocator.Size)}
+                               blockServerInfo := blockToServers[blockDigestWithSize]
+                               c.Assert(blockServerInfo[0].Mtime, NotNil)
+                       }
+               }
+       } else {
+               c.Assert(err, ErrorMatches, testData.expectedError)
+       }
+}
index 18b3aec8190408897d2857e3e6b2a212936bc21c..6e463d7670b25fb4579292fd765a8470ada0b449 100644 (file)
@@ -9,7 +9,6 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/logger"
        "git.curoverse.com/arvados.git/services/datamanager/collection"
        "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
        "log"
        "os"
 )
@@ -22,17 +21,17 @@ type serializedData struct {
 }
 
 var (
-       writeDataTo  string
+       WriteDataTo  string
        readDataFrom string
 )
 
 // DataFetcher to fetch data from keep servers
 type DataFetcher func(arvLogger *logger.Logger,
        readCollections *collection.ReadCollections,
-       keepServerInfo *keep.ReadServers)
+       keepServerInfo *keep.ReadServers) error
 
 func init() {
-       flag.StringVar(&writeDataTo,
+       flag.StringVar(&WriteDataTo,
                "write-data-to",
                "",
                "Write summary of data received to this file. Used for development only.")
@@ -51,14 +50,13 @@ func init() {
 // working with stale data.
 func MaybeWriteData(arvLogger *logger.Logger,
        readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) bool {
-       if writeDataTo == "" {
-               return false
+       keepServerInfo keep.ReadServers) error {
+       if WriteDataTo == "" {
+               return nil
        }
-       summaryFile, err := os.Create(writeDataTo)
+       summaryFile, err := os.Create(WriteDataTo)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
+               return err
        }
        defer summaryFile.Close()
 
@@ -68,11 +66,10 @@ func MaybeWriteData(arvLogger *logger.Logger,
                KeepServerInfo:  keepServerInfo}
        err = enc.Encode(data)
        if err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Failed to write summary data: %v", err))
+               return err
        }
-       log.Printf("Wrote summary data to: %s", writeDataTo)
-       return true
+       log.Printf("Wrote summary data to: %s", WriteDataTo)
+       return nil
 }
 
 // ShouldReadData should not be used outside of development
@@ -89,33 +86,30 @@ func ShouldReadData() bool {
 // working with stale data.
 func ReadData(arvLogger *logger.Logger,
        readCollections *collection.ReadCollections,
-       keepServerInfo *keep.ReadServers) {
+       keepServerInfo *keep.ReadServers) error {
        if readDataFrom == "" {
-               loggerutil.FatalWithMessage(arvLogger,
-                       "ReadData() called with empty filename.")
-       } else {
-               summaryFile, err := os.Open(readDataFrom)
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Failed to open %s: %v", readDataFrom, err))
-               }
-               defer summaryFile.Close()
+               return fmt.Errorf("ReadData() called with empty filename.")
+       }
+       summaryFile, err := os.Open(readDataFrom)
+       if err != nil {
+               return err
+       }
+       defer summaryFile.Close()
 
-               dec := gob.NewDecoder(summaryFile)
-               data := serializedData{}
-               err = dec.Decode(&data)
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Failed to read summary data: %v", err))
-               }
+       dec := gob.NewDecoder(summaryFile)
+       data := serializedData{}
+       err = dec.Decode(&data)
+       if err != nil {
+               return err
+       }
 
-               // re-summarize data, so that we can update our summarizing
-               // functions without needing to do all our network i/o
-               data.ReadCollections.Summarize(arvLogger)
-               data.KeepServerInfo.Summarize(arvLogger)
+       // re-summarize data, so that we can update our summarizing
+       // functions without needing to do all our network i/o
+       data.ReadCollections.Summarize(arvLogger)
+       data.KeepServerInfo.Summarize(arvLogger)
 
-               *readCollections = data.ReadCollections
-               *keepServerInfo = data.KeepServerInfo
-               log.Printf("Read summary data from: %s", readDataFrom)
-       }
+       *readCollections = data.ReadCollections
+       *keepServerInfo = data.KeepServerInfo
+       log.Printf("Read summary data from: %s", readDataFrom)
+       return nil
 }
index cc01249a624a7f4947cdcfc8dafd73dd7e347377..aac9aec9716a74c33f5c18a4ec23e2f9b0b8e212 100644 (file)
@@ -9,7 +9,6 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/logger"
        "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
        "log"
        "os"
        "strings"
@@ -176,23 +175,41 @@ func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
 // This is just a hack for prototyping, it is not expected to be used
 // in production.
 func WritePullLists(arvLogger *logger.Logger,
-       pullLists map[string]PullList) {
+       pullLists map[string]PullList,
+       dryRun bool) error {
        r := strings.NewReplacer(":", ".")
+
        for host, list := range pullLists {
+               if arvLogger != nil {
+                       // We need a local variable because Update doesn't call our mutator func until later,
+                       // when our list variable might have been reused by the next loop iteration.
+                       host := host
+                       listLen := len(list)
+                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
+                               pullListInfo := logger.GetOrCreateMap(p, "pull_list_len")
+                               pullListInfo[host] = listLen
+                       })
+               }
+
+               if dryRun {
+                       log.Print("dry run, not sending pull list to service %s with %d blocks", host, len(list))
+                       continue
+               }
+
                filename := fmt.Sprintf("pull_list.%s", r.Replace(RemoveProtocolPrefix(host)))
                pullListFile, err := os.Create(filename)
                if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Failed to open %s: %v", filename, err))
+                       return err
                }
                defer pullListFile.Close()
 
                enc := json.NewEncoder(pullListFile)
                err = enc.Encode(list)
                if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Failed to write pull list to %s: %v", filename, err))
+                       return err
                }
                log.Printf("Wrote pull list to %s.", filename)
        }
+
+       return nil
 }
diff --git a/services/dockercleaner/MANIFEST.in b/services/dockercleaner/MANIFEST.in
new file mode 100644 (file)
index 0000000..4db8152
--- /dev/null
@@ -0,0 +1 @@
+include agpl-3.0.txt
diff --git a/services/dockercleaner/agpl-3.0.txt b/services/dockercleaner/agpl-3.0.txt
new file mode 100644 (file)
index 0000000..dba13ed
--- /dev/null
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<http://www.gnu.org/licenses/>.
index 191cb55601053d1f58a284ce08847149d04d2522..2435e6de806043b4e014071e8f9e05f06b159c4e 100755 (executable)
@@ -177,9 +177,10 @@ class DockerImageUseRecorder(DockerEventListener):
 class DockerImageCleaner(DockerImageUseRecorder):
     event_handlers = DockerImageUseRecorder.event_handlers.copy()
 
-    def __init__(self, images, docker_client, events):
+    def __init__(self, images, docker_client, events, remove_containers_onexit=False):
         super().__init__(images, docker_client, events)
         self.logged_unknown = set()
+        self.remove_containers_onexit = remove_containers_onexit
 
     def new_container(self, event, container_hash):
         container_image_id = container_hash['Image']
@@ -188,6 +189,29 @@ class DockerImageCleaner(DockerImageUseRecorder):
             self.images.add_image(image_hash)
         return super().new_container(event, container_hash)
 
+    def _remove_container(self, cid):
+        try:
+            self.docker_client.remove_container(cid)
+        except docker.errors.APIError as error:
+            logger.warning("Failed to remove container %s: %s", cid, error)
+        else:
+            logger.info("Removed container %s", cid)
+
+    @event_handlers.on('die')
+    def clean_container(self, event=None):
+        if self.remove_containers_onexit:
+            self._remove_container(event['id'])
+
+    def check_stopped_containers(self, remove=False):
+        logger.info("Checking for stopped containers")
+        for c in self.docker_client.containers(filters={'status': 'exited'}):
+            logger.info("Container %s %s", c['Id'], c['Status'])
+            if c['Status'][:6] != 'Exited':
+                logger.error("Unexpected status %s for container %s",
+                             c['Status'], c['Id'])
+            elif remove:
+                self._remove_container(c['Id'])
+
     @event_handlers.on('destroy')
     def clean_images(self, event=None):
         for image_id in self.images.should_delete():
@@ -225,6 +249,12 @@ def parse_arguments(arguments):
     parser.add_argument(
         '--quota', action='store', type=human_size, required=True,
         help="space allowance for Docker images, suffixed with K/M/G/T")
+    parser.add_argument(
+        '--remove-stopped-containers', type=str, default='always',
+        choices=['never', 'onexit', 'always'],
+        help="""when to remove stopped containers (default: always, i.e., remove
+        stopped containers found at startup, and remove containers as
+        soon as they exit)""")
     parser.add_argument(
         '--verbose', '-v', action='count', default=0,
         help="log more information")
@@ -246,9 +276,13 @@ def run(args, docker_client):
         images, docker_client, docker_client.events(since=1, until=start_time))
     use_recorder.run()
     cleaner = DockerImageCleaner(
-        images, docker_client, docker_client.events(since=start_time))
-    logger.info("Starting cleanup loop")
+        images, docker_client, docker_client.events(since=start_time),
+        remove_containers_onexit=args.remove_stopped_containers != 'never')
+    cleaner.check_stopped_containers(
+        remove=args.remove_stopped_containers == 'always')
+    logger.info("Checking image quota at startup")
     cleaner.clean_images()
+    logger.info("Listening for docker events")
     cleaner.run()
 
 def main(arguments):
index 5944638092f9453a1cd5ea49d8d3e8924aba93b3..1a9d0fd62eb70d667bc82b7e5a85a6f720ef1bb5 100644 (file)
@@ -21,6 +21,9 @@ setup(name="arvados-docker-cleaner",
       download_url="https://github.com/curoverse/arvados.git",
       license="GNU Affero General Public License version 3.0",
       packages=find_packages(),
+      data_files=[
+          ('share/doc/arvados-docker-cleaner', ['agpl-3.0.txt']),
+      ],
       install_requires=[
         'docker-py',
         ],
index fd959de7762dd2b0a54f5b08b1107e4aedc45d5a..43abe4f636dcb1076ee65e90f7a549f6616af834 100644 (file)
@@ -223,13 +223,14 @@ class DockerImagesTestCase(unittest.TestCase):
 
 class DockerImageUseRecorderTestCase(unittest.TestCase):
     TEST_CLASS = cleaner.DockerImageUseRecorder
+    TEST_CLASS_INIT_KWARGS = {}
 
     def setUp(self):
         self.images = mock.MagicMock(name='images')
         self.docker_client = mock.MagicMock(name='docker_client')
         self.events = []
         self.recorder = self.TEST_CLASS(self.images, self.docker_client,
-                                        self.encoded_events)
+                                        self.encoded_events, **self.TEST_CLASS_INIT_KWARGS)
 
     @property
     def encoded_events(self):
@@ -310,6 +311,23 @@ class DockerImageCleanerTestCase(DockerImageUseRecorderTestCase):
         self.assertFalse(self.images.del_image.called)
 
 
+class DockerContainerCleanerTestCase(DockerImageUseRecorderTestCase):
+    TEST_CLASS = cleaner.DockerImageCleaner
+    TEST_CLASS_INIT_KWARGS = {'remove_containers_onexit': True}
+
+    @mock.patch('arvados_docker.cleaner.logger')
+    def test_failed_container_deletion_handling(self, mockLogger):
+        cid = MockDockerId()
+        self.docker_client.remove_container.side_effect = MockException(500)
+        self.events.append(MockEvent('die', docker_id=cid))
+        self.recorder.run()
+        self.docker_client.remove_container.assert_called_with(cid)
+        self.assertEqual("Failed to remove container %s: %s",
+                         mockLogger.warning.call_args[0][0])
+        self.assertEqual(cid,
+                         mockLogger.warning.call_args[0][1])
+
+
 class HumanSizeTestCase(unittest.TestCase):
     def check(self, human_str, count, exp):
         self.assertEqual(count * (1024 ** exp),
@@ -354,3 +372,57 @@ class RunTestCase(unittest.TestCase):
         self.assertLessEqual(test_start_time, event_kwargs[0]['until'])
         self.assertIn('since', event_kwargs[1])
         self.assertEqual(event_kwargs[0]['until'], event_kwargs[1]['since'])
+
+
+class ContainerRemovalTestCase(unittest.TestCase):
+    LIFECYCLE = ['create', 'attach', 'start', 'resize', 'die', 'destroy']
+
+    def setUp(self):
+        self.args = mock.MagicMock(name='args')
+        self.docker_client = mock.MagicMock(name='docker_client')
+        self.existingCID = MockDockerId()
+        self.docker_client.containers.return_value = [{
+            'Id': self.existingCID,
+            'Status': 'Exited (0) 6 weeks ago',
+        }, {
+            # If docker_client.containers() returns non-exited
+            # containers for some reason, do not remove them.
+            'Id': MockDockerId(),
+            'Status': 'Running',
+        }]
+        self.newCID = MockDockerId()
+        self.docker_client.events.return_value = [
+            MockEvent(e, docker_id=self.newCID).encoded()
+            for e in self.LIFECYCLE]
+
+    def test_remove_onexit(self):
+        self.args.remove_stopped_containers = 'onexit'
+        cleaner.run(self.args, self.docker_client)
+        self.docker_client.remove_container.assert_called_once_with(self.newCID)
+
+    def test_remove_always(self):
+        self.args.remove_stopped_containers = 'always'
+        cleaner.run(self.args, self.docker_client)
+        self.docker_client.remove_container.assert_any_call(self.existingCID)
+        self.docker_client.remove_container.assert_any_call(self.newCID)
+        self.assertEqual(2, self.docker_client.remove_container.call_count)
+
+    def test_remove_never(self):
+        self.args.remove_stopped_containers = 'never'
+        cleaner.run(self.args, self.docker_client)
+        self.assertEqual(0, self.docker_client.remove_container.call_count)
+
+    def test_container_exited_between_subscribe_events_and_check_existing(self):
+        self.args.remove_stopped_containers = 'always'
+        self.docker_client.events.return_value = [
+            MockEvent(e, docker_id=self.existingCID).encoded()
+            for e in ['die', 'destroy']]
+        cleaner.run(self.args, self.docker_client)
+        # Subscribed to events before getting the list of existing
+        # exited containers?
+        self.docker_client.assert_has_calls([
+            mock.call.events(since=mock.ANY),
+            mock.call.containers(filters={'status':'exited'})])
+        # Asked to delete the container twice?
+        self.docker_client.remove_container.assert_has_calls([mock.call(self.existingCID)] * 2)
+        self.assertEqual(2, self.docker_client.remove_container.call_count)
index 9561fb1061f6de114633c70871232a6896dcbe8a..5ac3e99631fa7ff8f79b1c678386abca065da449 100644 (file)
@@ -1 +1,2 @@
+include agpl-3.0.txt
 include README.rst
diff --git a/services/fuse/agpl-3.0.txt b/services/fuse/agpl-3.0.txt
new file mode 100644 (file)
index 0000000..dba13ed
--- /dev/null
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<http://www.gnu.org/licenses/>.
index 49151318a751941742295ad427816414cfe4ad43..65f117b55a700b9aa218a143a7f80f00e982cb3a 100644 (file)
@@ -66,6 +66,7 @@ import itertools
 import ciso8601
 import collections
 import functools
+import arvados.keep
 
 import Queue
 
@@ -75,7 +76,7 @@ import Queue
 
 llfuse.capi._notify_queue = Queue.Queue()
 
-from fusedir import sanitize_filename, Directory, CollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
+from fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
 from fusefile import StringFile, FuseArvadosFile
 
 _logger = logging.getLogger('arvados.arvados_fuse')
@@ -191,8 +192,8 @@ class InodeCache(object):
         if obj.persisted() and obj.cache_priority in self._entries:
             self._remove(obj, True)
 
-    def find(self, uuid):
-        return self._by_uuid.get(uuid)
+    def find_by_uuid(self, uuid):
+        return self._by_uuid.get(uuid, [])
 
     def clear(self):
         self._entries.clear()
@@ -303,9 +304,11 @@ class Operations(llfuse.Operations):
 
     """
 
-    def __init__(self, uid, gid, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
+    def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
         super(Operations, self).__init__()
 
+        self._api_client = api_client
+
         if not inode_cache:
             inode_cache = InodeCache(cap=256*1024*1024)
         self.inodes = Inodes(inode_cache, encoding=encoding)
@@ -321,8 +324,19 @@ class Operations(llfuse.Operations):
         # is fully initialized should wait() on this event object.
         self.initlock = threading.Event()
 
+        # If we get overlapping shutdown events (e.g., fusermount -u
+        # -z and operations.destroy()) llfuse calls forget() on inodes
+        # that have already been deleted. To avoid this, we make
+        # forget() a no-op if called after destroy().
+        self._shutdown_started = threading.Event()
+
         self.num_retries = num_retries
 
+        self.read_counter = arvados.keep.Counter()
+        self.write_counter = arvados.keep.Counter()
+        self.read_ops_counter = arvados.keep.Counter()
+        self.write_ops_counter = arvados.keep.Counter()
+
         self.events = None
 
     def init(self):
@@ -332,50 +346,55 @@ class Operations(llfuse.Operations):
 
     @catch_exceptions
     def destroy(self):
-        if self.events:
-            self.events.close()
-            self.events = None
+        with llfuse.lock:
+            self._shutdown_started.set()
+            if self.events:
+                self.events.close()
+                self.events = None
 
-        self.inodes.clear()
+            self.inodes.clear()
 
     def access(self, inode, mode, ctx):
         return True
 
-    def listen_for_events(self, api_client):
-        self.events = arvados.events.subscribe(api_client,
+    def listen_for_events(self):
+        self.events = arvados.events.subscribe(self._api_client,
                                  [["event_type", "in", ["create", "update", "delete"]]],
                                  self.on_event)
 
     @catch_exceptions
     def on_event(self, ev):
-        if 'event_type' in ev:
-            with llfuse.lock:
-                items = self.inodes.inode_cache.find(ev["object_uuid"])
-                if items is not None:
-                    for item in items:
-                        item.invalidate()
-                        if ev["object_kind"] == "arvados#collection":
-                            new_attr = ev.get("properties") and ev["properties"].get("new_attributes") and ev["properties"]["new_attributes"]
-
-                            # new_attributes.modified_at currently lacks subsecond precision (see #6347) so use event_at which
-                            # should always be the same.
-                            #record_version = (new_attr["modified_at"], new_attr["portable_data_hash"]) if new_attr else None
-                            record_version = (ev["event_at"], new_attr["portable_data_hash"]) if new_attr else None
-
-                            item.update(to_record_version=record_version)
-                        else:
-                            item.update()
-
-                oldowner = ev.get("properties") and ev["properties"].get("old_attributes") and ev["properties"]["old_attributes"].get("owner_uuid")
-                olditemparent = self.inodes.inode_cache.find(oldowner)
-                if olditemparent is not None:
-                    olditemparent.invalidate()
-                    olditemparent.update()
-
-                itemparent = self.inodes.inode_cache.find(ev["object_owner_uuid"])
-                if itemparent is not None:
-                    itemparent.invalidate()
-                    itemparent.update()
+        if 'event_type' not in ev:
+            return
+        with llfuse.lock:
+            for item in self.inodes.inode_cache.find_by_uuid(ev["object_uuid"]):
+                item.invalidate()
+                if ev["object_kind"] == "arvados#collection":
+                    new_attr = (ev.get("properties") and
+                                ev["properties"].get("new_attributes") and
+                                ev["properties"]["new_attributes"])
+
+                    # new_attributes.modified_at currently lacks
+                    # subsecond precision (see #6347) so use event_at
+                    # which should always be the same.
+                    record_version = (
+                        (ev["event_at"], new_attr["portable_data_hash"])
+                        if new_attr else None)
+
+                    item.update(to_record_version=record_version)
+                else:
+                    item.update()
+
+            oldowner = (
+                ev.get("properties") and
+                ev["properties"].get("old_attributes") and
+                ev["properties"]["old_attributes"].get("owner_uuid"))
+            newowner = ev["object_owner_uuid"]
+            for parent in (
+                    self.inodes.inode_cache.find_by_uuid(oldowner) +
+                    self.inodes.inode_cache.find_by_uuid(newowner)):
+                parent.invalidate()
+                parent.update()
 
 
     @catch_exceptions
@@ -388,8 +407,8 @@ class Operations(llfuse.Operations):
         entry = llfuse.EntryAttributes()
         entry.st_ino = inode
         entry.generation = 0
-        entry.entry_timeout = 60
-        entry.attr_timeout = 60
+        entry.entry_timeout = 60 if e.allow_dirent_cache else 0
+        entry.attr_timeout = 60 if e.allow_attr_cache else 0
 
         entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
         if isinstance(e, Directory):
@@ -457,6 +476,8 @@ class Operations(llfuse.Operations):
 
     @catch_exceptions
     def forget(self, inodes):
+        if self._shutdown_started.is_set():
+            return
         for inode, nlookup in inodes:
             ent = self.inodes[inode]
             _logger.debug("arv-mount forget: inode %i nlookup %i ref_count %i", inode, nlookup, ent.ref_count)
@@ -484,6 +505,8 @@ class Operations(llfuse.Operations):
     @catch_exceptions
     def read(self, fh, off, size):
         _logger.debug("arv-mount read %i %i %i", fh, off, size)
+        self.read_ops_counter.add(1)
+
         if fh in self._filehandles:
             handle = self._filehandles[fh]
         else:
@@ -491,11 +514,16 @@ class Operations(llfuse.Operations):
 
         self.inodes.touch(handle.obj)
 
-        return handle.obj.readfrom(off, size, self.num_retries)
+        r = handle.obj.readfrom(off, size, self.num_retries)
+        if r:
+            self.read_counter.add(len(r))
+        return r
 
     @catch_exceptions
     def write(self, fh, off, buf):
         _logger.debug("arv-mount write %i %i %i", fh, off, len(buf))
+        self.write_ops_counter.add(1)
+
         if fh in self._filehandles:
             handle = self._filehandles[fh]
         else:
@@ -506,7 +534,10 @@ class Operations(llfuse.Operations):
 
         self.inodes.touch(handle.obj)
 
-        return handle.obj.writeto(off, buf, self.num_retries)
+        w = handle.obj.writeto(off, buf, self.num_retries)
+        if w:
+            self.write_counter.add(w)
+        return w
 
     @catch_exceptions
     def release(self, fh):
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
new file mode 100644 (file)
index 0000000..93bcd20
--- /dev/null
@@ -0,0 +1,338 @@
+import argparse
+import arvados
+import daemon
+import llfuse
+import logging
+import os
+import signal
+import subprocess
+import sys
+import time
+
+import arvados.commands._util as arv_cmd
+from arvados_fuse import crunchstat
+from arvados_fuse import *
+
+class ArgumentParser(argparse.ArgumentParser):
+    def __init__(self):
+        super(ArgumentParser, self).__init__(
+            parents=[arv_cmd.retry_opt],
+            description='''Mount Keep data under the local filesystem.  Default mode is --home''',
+            epilog="""
+    Note: When using the --exec feature, you must either specify the
+    mountpoint before --exec, or mark the end of your --exec arguments
+    with "--".
+            """)
+        self.add_argument('mountpoint', type=str, help="""Mount point.""")
+        self.add_argument('--allow-other', action='store_true',
+                            help="""Let other users read the mount""")
+
+        mode = self.add_mutually_exclusive_group()
+
+        mode.add_argument('--all', action='store_const', const='all', dest='mode',
+                                help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default if no --mount-* arguments are given).""")
+        mode.add_argument('--custom', action='store_const', const=None, dest='mode',
+                                help="""Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments (default if any --mount-* arguments are given).""")
+        mode.add_argument('--home', action='store_const', const='home', dest='mode',
+                                help="""Mount only the user's home project.""")
+        mode.add_argument('--shared', action='store_const', const='shared', dest='mode',
+                                help="""Mount only list of projects shared with the user.""")
+        mode.add_argument('--by-tag', action='store_const', const='by_tag', dest='mode',
+                                help="""Mount subdirectories listed by tag.""")
+        mode.add_argument('--by-id', action='store_const', const='by_id', dest='mode',
+                                help="""Mount subdirectories listed by portable data hash or uuid.""")
+        mode.add_argument('--by-pdh', action='store_const', const='by_pdh', dest='mode',
+                                help="""Mount subdirectories listed by portable data hash.""")
+        mode.add_argument('--project', type=str, metavar='UUID',
+                                help="""Mount the specified project.""")
+        mode.add_argument('--collection', type=str, metavar='UUID_or_PDH',
+                                help="""Mount only the specified collection.""")
+
+        mounts = self.add_argument_group('Custom mount options')
+        mounts.add_argument('--mount-by-pdh',
+                            type=str, metavar='PATH', action='append', default=[],
+                            help="Mount each readable collection at mountpoint/PATH/P where P is the collection's portable data hash.")
+        mounts.add_argument('--mount-by-id',
+                            type=str, metavar='PATH', action='append', default=[],
+                            help="Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH where PDH is the collection's portable data hash and UUID is its UUID.")
+        mounts.add_argument('--mount-by-tag',
+                            type=str, metavar='PATH', action='append', default=[],
+                            help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.")
+        mounts.add_argument('--mount-home',
+                            type=str, metavar='PATH', action='append', default=[],
+                            help="Mount the current user's home project at mountpoint/PATH.")
+        mounts.add_argument('--mount-shared',
+                            type=str, metavar='PATH', action='append', default=[],
+                            help="Mount projects shared with the current user at mountpoint/PATH.")
+        mounts.add_argument('--mount-tmp',
+                            type=str, metavar='PATH', action='append', default=[],
+                            help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.")
+
+        self.add_argument('--debug', action='store_true', help="""Debug mode""")
+        self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
+        self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
+        self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
+
+        self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
+        self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
+
+        self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
+        self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
+
+        self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
+
+        self.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
+                            dest="exec_args", metavar=('command', 'args', '...', '--'),
+                            help="""Mount, run a command, then unmount and exit""")
+
+
+class Mount(object):
+    def __init__(self, args, logger=logging.getLogger('arvados.arv-mount')):
+        self.logger = logger
+        self.args = args
+
+        self.args.mountpoint = os.path.realpath(self.args.mountpoint)
+        if self.args.logfile:
+            self.args.logfile = os.path.realpath(self.args.logfile)
+
+        # Daemonize as early as possible, so we don't accidentally close
+        # file descriptors we're using.
+        self.daemon_ctx = None
+        if not (self.args.exec_args or self.args.foreground):
+            os.chdir(self.args.mountpoint)
+            self.daemon_ctx = daemon.DaemonContext(working_directory='.')
+            self.daemon_ctx.open()
+
+        try:
+            self._setup_logging()
+            self._setup_api()
+            self._setup_mount()
+        except Exception as e:
+            self.logger.exception("arv-mount: exception during setup: %s", e)
+            exit(1)
+
+    def __enter__(self):
+        llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
+        if self.args.mode != 'by_pdh':
+            self.operations.listen_for_events()
+        t = threading.Thread(None, lambda: llfuse.main())
+        t.start()
+        self.operations.initlock.wait()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
+        self.operations.destroy()
+
+    def run(self):
+        if self.args.exec_args:
+            self._run_exec()
+        else:
+            self._run_standalone()
+
+    def _fuse_options(self):
+        """FUSE mount options; see mount.fuse(8)"""
+        opts = [optname for optname in ['allow_other', 'debug']
+                if getattr(self.args, optname)]
+        # Increase default read/write size from 4KiB to 128KiB
+        opts += ["big_writes", "max_read=131072"]
+        return opts
+
+    def _setup_logging(self):
+        # Configure a log handler based on command-line switches.
+        if self.args.logfile:
+            log_handler = logging.FileHandler(self.args.logfile)
+        elif self.daemon_ctx:
+            log_handler = logging.NullHandler()
+        else:
+            log_handler = None
+
+        if log_handler is not None:
+            arvados.logger.removeHandler(arvados.log_handler)
+            arvados.logger.addHandler(log_handler)
+
+        if self.args.debug:
+            arvados.logger.setLevel(logging.DEBUG)
+            self.logger.debug("arv-mount debugging enabled")
+
+        self.logger.info("enable write is %s", self.args.enable_write)
+
+    def _setup_api(self):
+        self.api = arvados.safeapi.ThreadSafeApiCache(
+            apiconfig=arvados.config.settings(),
+            keep_params={
+                "block_cache": arvados.keep.KeepBlockCache(self.args.file_cache)
+            })
+
+    def _setup_mount(self):
+        self.operations = Operations(
+            os.getuid(),
+            os.getgid(),
+            api_client=self.api,
+            encoding=self.args.encoding,
+            inode_cache=InodeCache(cap=self.args.directory_cache),
+            enable_write=self.args.enable_write)
+
+        if self.args.crunchstat_interval:
+            statsthread = threading.Thread(
+                target=crunchstat.statlogger,
+                args=(self.args.crunchstat_interval,
+                      self.api.keep,
+                      self.operations))
+            statsthread.daemon = True
+            statsthread.start()
+
+        usr = self.api.users().current().execute(num_retries=self.args.retries)
+        now = time.time()
+        dir_class = None
+        dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries]
+        mount_readme = False
+
+        if self.args.collection is not None:
+            # Set up the request handler with the collection at the root
+            self.args.mode = 'collection'
+            dir_class = CollectionDirectory
+            dir_args.append(self.args.collection)
+        elif self.args.project is not None:
+            self.args.mode = 'project'
+            dir_class = ProjectDirectory
+            dir_args.append(
+                self.api.groups().get(uuid=self.args.project).execute(
+                    num_retries=self.args.retries))
+
+        if (self.args.mount_by_id or
+            self.args.mount_by_pdh or
+            self.args.mount_by_tag or
+            self.args.mount_home or
+            self.args.mount_shared or
+            self.args.mount_tmp):
+            if self.args.mode is not None:
+                sys.exit(
+                    "Cannot combine '{}' mode with custom --mount-* options.".
+                    format(self.args.mode))
+        elif self.args.mode is None:
+            # If no --mount-custom or custom mount args, --all is the default
+            self.args.mode = 'all'
+
+        if self.args.mode in ['by_id', 'by_pdh']:
+            # Set up the request handler with the 'magic directory' at the root
+            dir_class = MagicDirectory
+            dir_args.append(self.args.mode == 'by_pdh')
+        elif self.args.mode == 'by_tag':
+            dir_class = TagsDirectory
+        elif self.args.mode == 'shared':
+            dir_class = SharedDirectory
+            dir_args.append(usr)
+        elif self.args.mode == 'home':
+            dir_class = ProjectDirectory
+            dir_args.append(usr)
+            dir_args.append(True)
+        elif self.args.mode == 'all':
+            self.args.mount_by_id = ['by_id']
+            self.args.mount_by_tag = ['by_tag']
+            self.args.mount_home = ['home']
+            self.args.mount_shared = ['shared']
+            mount_readme = True
+
+        if dir_class is not None:
+            self.operations.inodes.add_entry(dir_class(*dir_args))
+            return
+
+        e = self.operations.inodes.add_entry(Directory(
+            llfuse.ROOT_INODE, self.operations.inodes))
+        dir_args[0] = e.inode
+
+        for name in self.args.mount_by_id:
+            self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False))
+        for name in self.args.mount_by_pdh:
+            self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=True))
+        for name in self.args.mount_by_tag:
+            self._add_mount(e, name, TagsDirectory(*dir_args))
+        for name in self.args.mount_home:
+            self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True))
+        for name in self.args.mount_shared:
+            self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True))
+        for name in self.args.mount_tmp:
+            self._add_mount(e, name, TmpCollectionDirectory(*dir_args))
+
+        if mount_readme:
+            text = self._readme_text(
+                arvados.config.get('ARVADOS_API_HOST'),
+                usr['email'])
+            self._add_mount(e, 'README', StringFile(e.inode, text, now))
+
+    def _add_mount(self, tld, name, ent):
+        if name in ['', '.', '..'] or '/' in name:
+            sys.exit("Mount point '{}' is not supported.".format(name))
+        tld._entries[name] = self.operations.inodes.add_entry(ent)
+
+    def _readme_text(self, api_host, user_email):
+        return '''
+Welcome to Arvados!  This directory provides file system access to
+files and objects available on the Arvados installation located at
+'{}' using credentials for user '{}'.
+
+From here, the following directories are available:
+
+  by_id/     Access to Keep collections by uuid or portable data hash (see by_id/README for details).
+  by_tag/    Access to Keep collections organized by tag.
+  home/      The contents of your home project.
+  shared/    Projects shared with you.
+
+'''.format(api_host, user_email)
+
+    def _run_exec(self):
+        # Initialize the fuse connection
+        llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
+
+        # Subscribe to change events from API server
+        if self.args.mode != 'by_pdh':
+            self.operations.listen_for_events()
+
+        t = threading.Thread(None, lambda: llfuse.main())
+        t.start()
+
+        # wait until the driver is finished initializing
+        self.operations.initlock.wait()
+
+        rc = 255
+        try:
+            sp = subprocess.Popen(self.args.exec_args, shell=False)
+
+            # forward signals to the process.
+            signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum))
+            signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum))
+            signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum))
+
+            # wait for process to complete.
+            rc = sp.wait()
+
+            # restore default signal handlers.
+            signal.signal(signal.SIGINT, signal.SIG_DFL)
+            signal.signal(signal.SIGTERM, signal.SIG_DFL)
+            signal.signal(signal.SIGQUIT, signal.SIG_DFL)
+        except Exception as e:
+            self.logger.exception(
+                'arv-mount: exception during exec %s', self.args.exec_args)
+            try:
+                rc = e.errno
+            except AttributeError:
+                pass
+        finally:
+            subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
+            self.operations.destroy()
+        exit(rc)
+
+    def _run_standalone(self):
+        try:
+            llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
+
+            # Subscribe to change events from API server
+            self.operations.listen_for_events()
+
+            llfuse.main()
+        except Exception as e:
+            self.logger.exception('arv-mount: exception during mount: %s', e)
+            exit(getattr(e, 'errno', 1))
+        finally:
+            self.operations.destroy()
+        exit(0)
diff --git a/services/fuse/arvados_fuse/crunchstat.py b/services/fuse/arvados_fuse/crunchstat.py
new file mode 100644 (file)
index 0000000..67d2ccc
--- /dev/null
@@ -0,0 +1,63 @@
+import sys
+import time
+
+class Stat(object):
+    def __init__(self, prefix, interval,
+                 egr_name, ing_name,
+                 egr_func, ing_func):
+        self.prefix = prefix
+        self.interval = interval
+        self.egr_name = egr_name
+        self.ing_name = ing_name
+        self.egress = egr_func
+        self.ingress = ing_func
+        self.egr_prev = self.egress()
+        self.ing_prev = self.ingress()
+
+    def update(self):
+        egr = self.egress()
+        ing = self.ingress()
+
+        delta = " -- interval %.4f seconds %d %s %d %s" % (self.interval,
+                                                           egr - self.egr_prev,
+                                                           self.egr_name,
+                                                           ing - self.ing_prev,
+                                                           self.ing_name)
+
+        sys.stderr.write("crunchstat: %s %d %s %d %s%s\n" % (self.prefix,
+                                                             egr,
+                                                             self.egr_name,
+                                                             ing,
+                                                             self.ing_name,
+                                                             delta))
+
+        self.egr_prev = egr
+        self.ing_prev = ing
+
+
+def statlogger(interval, keep, ops):
+    calls = Stat("keepcalls", interval, "put", "get",
+                 keep.put_counter.get,
+                 keep.get_counter.get)
+    net = Stat("net:keep0", interval, "tx", "rx",
+               keep.upload_counter.get,
+               keep.download_counter.get)
+    cache = Stat("keepcache", interval, "hit", "miss",
+               keep.hits_counter.get,
+               keep.misses_counter.get)
+    fuseops = Stat("fuseops", interval,"write", "read",
+                   ops.write_ops_counter.get,
+                   ops.read_ops_counter.get)
+    blk = Stat("blkio:0:0", interval, "write", "read",
+               ops.write_counter.get,
+               ops.read_counter.get)
+
+    while True:
+        time.sleep(interval)
+        calls.update()
+        net.update()
+        cache.update()
+        fuseops.update()
+        blk.update()
+
+
index ec2d47a269791786ad38b7cc9b1e0424869b9042..2075741dbd64b923b37a8ca6da84808bbbb83cc2 100644 (file)
@@ -67,6 +67,8 @@ class FreshBase(object):
         self.cache_priority = None
         self.cache_size = 0
         self.cache_uuid = None
+        self.allow_attr_cache = True
+        self.allow_dirent_cache = True
 
     # Mark the value as stale
     def invalidate(self):
index 8ffca49601d795fc0622326e04eb219dd3dd0d8d..a2135b3defc299e29b3f5a284f61653d24037fe7 100644 (file)
@@ -8,8 +8,9 @@ import functools
 import threading
 from apiclient import errors as apiclient_errors
 import errno
+import time
 
-from fusefile import StringFile, ObjectFile, FuseArvadosFile
+from fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile
 from fresh import FreshBase, convertTime, use_counter, check_update
 
 import arvados.collection
@@ -474,6 +475,75 @@ class CollectionDirectory(CollectionDirectoryBase):
             self.collection.stop_threads()
 
 
+class TmpCollectionDirectory(CollectionDirectoryBase):
+    """A directory backed by an Arvados collection that never gets saved.
+
+    This supports using Keep as scratch space. A userspace program can
+    read the .arvados#collection file to get a current manifest in
+    order to save a snapshot of the scratch data or use it as a crunch
+    job output.
+    """
+
+    class UnsaveableCollection(arvados.collection.Collection):
+        def save(self):
+            pass
+        def save_new(self):
+            pass
+
+    def __init__(self, parent_inode, inodes, api_client, num_retries):
+        collection = self.UnsaveableCollection(
+            api_client=api_client,
+            keep_client=api_client.keep)
+        super(TmpCollectionDirectory, self).__init__(
+            parent_inode, inodes, collection)
+        self.collection_record_file = None
+        self.populate(self.mtime())
+
+    def on_event(self, *args, **kwargs):
+        super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
+        if self.collection_record_file:
+            with llfuse.lock:
+                self.collection_record_file.invalidate()
+            self.inodes.invalidate_inode(self.collection_record_file.inode)
+            _logger.debug("%s invalidated collection record", self)
+
+    def collection_record(self):
+        with llfuse.lock_released:
+            return {
+                "uuid": None,
+                "manifest_text": self.collection.manifest_text(),
+                "portable_data_hash": self.collection.portable_data_hash(),
+            }
+
+    def __contains__(self, k):
+        return (k == '.arvados#collection' or
+                super(TmpCollectionDirectory, self).__contains__(k))
+
+    @use_counter
+    def __getitem__(self, item):
+        if item == '.arvados#collection':
+            if self.collection_record_file is None:
+                self.collection_record_file = FuncToJSONFile(
+                    self.inode, self.collection_record)
+                self.inodes.add_entry(self.collection_record_file)
+            return self.collection_record_file
+        return super(TmpCollectionDirectory, self).__getitem__(item)
+
+    def persisted(self):
+        return False
+
+    def writable(self):
+        return True
+
+    def finalize(self):
+        self.collection.stop_threads()
+
+    def invalidate(self):
+        if self.collection_record_file:
+            self.collection_record_file.invalidate()
+        super(TmpCollectionDirectory, self).invalidate()
+
+
 class MagicDirectory(Directory):
     """A special directory that logically contains the set of all extant keep locators.
 
@@ -488,18 +558,20 @@ class MagicDirectory(Directory):
     README_TEXT = """
 This directory provides access to Arvados collections as subdirectories listed
 by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
-the form '1234567890abcdefghijklmnopqrstuv+123').
+the form '1234567890abcdef0123456789abcdef+123').
 
 Note that this directory will appear empty until you attempt to access a
 specific collection subdirectory (such as trying to 'cd' into it), at which
 point the collection will actually be looked up on the server and the directory
 will appear if it exists.
+
 """.lstrip()
 
-    def __init__(self, parent_inode, inodes, api, num_retries):
+    def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False):
         super(MagicDirectory, self).__init__(parent_inode, inodes)
         self.api = api
         self.num_retries = num_retries
+        self.pdh_only = pdh_only
 
     def __setattr__(self, name, value):
         super(MagicDirectory, self).__setattr__(name, value)
@@ -511,13 +583,13 @@ will appear if it exists.
             # If we're the root directory, add an identical by_id subdirectory.
             if self.inode == llfuse.ROOT_INODE:
                 self._entries['by_id'] = self.inodes.add_entry(MagicDirectory(
-                        self.inode, self.inodes, self.api, self.num_retries))
+                        self.inode, self.inodes, self.api, self.num_retries, self.pdh_only))
 
     def __contains__(self, k):
         if k in self._entries:
             return True
 
-        if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k):
+        if not portable_data_hash_pattern.match(k) and (self.pdh_only or not uuid_pattern.match(k)):
             return False
 
         try:
index 4d472cff1cca38380d80afa63b9783027ad1db30..e731327dec5524432d0eb12c7d9dfc2b900fafd4 100644 (file)
@@ -1,7 +1,8 @@
-import logging
-import re
 import json
 import llfuse
+import logging
+import re
+import time
 
 from fresh import FreshBase, convertTime
 
@@ -99,3 +100,37 @@ class ObjectFile(StringFile):
 
     def persisted(self):
         return True
+
+
+class FuncToJSONFile(StringFile):
+    """File content is the return value of a given function, encoded as JSON.
+
+    The function is called at the time the file is read. The result is
+    cached until invalidate() is called.
+    """
+    def __init__(self, parent_inode, func):
+        super(FuncToJSONFile, self).__init__(parent_inode, "", 0)
+        self.func = func
+
+        # invalidate_inode() and invalidate_entry() are asynchronous
+        # with no callback to wait for. In order to guarantee
+        # userspace programs don't get stale data that was generated
+        # before the last invalidate(), we must disallow dirent
+        # caching entirely.
+        self.allow_dirent_cache = False
+
+    def size(self):
+        self._update()
+        return super(FuncToJSONFile, self).size()
+
+    def readfrom(self, *args, **kwargs):
+        self._update()
+        return super(FuncToJSONFile, self).readfrom(*args, **kwargs)
+
+    def _update(self):
+        if not self.stale():
+            return
+        self._mtime = time.time()
+        obj = self.func()
+        self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n"
+        self.fresh()
index 7f9c916c456f49e76744f78ba06b18af2eb1ed1b..671d6a95e93f4d2988196aedf4b1eac485d1abd2 100755 (executable)
@@ -1,221 +1,7 @@
 #!/usr/bin/env python
 
-import argparse
-import arvados
-import daemon
-import logging
-import os
-import signal
-import subprocess
-import time
-
-import arvados.commands._util as arv_cmd
-from arvados_fuse import *
-from arvados.safeapi import ThreadSafeApiCache
-import arvados.keep
-
-logger = logging.getLogger('arvados.arv-mount')
+import arvados_fuse.command
 
 if __name__ == '__main__':
-    # Handle command line parameters
-    parser = argparse.ArgumentParser(
-        parents=[arv_cmd.retry_opt],
-        description='''Mount Keep data under the local filesystem.  Default mode is --home''',
-        epilog="""
-Note: When using the --exec feature, you must either specify the
-mountpoint before --exec, or mark the end of your --exec arguments
-with "--".
-""")
-    parser.add_argument('mountpoint', type=str, help="""Mount point.""")
-    parser.add_argument('--allow-other', action='store_true',
-                        help="""Let other users read the mount""")
-
-    mount_mode = parser.add_mutually_exclusive_group()
-
-    mount_mode.add_argument('--all', action='store_true', help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default).""")
-    mount_mode.add_argument('--home', action='store_true', help="""Mount only the user's home project.""")
-    mount_mode.add_argument('--shared', action='store_true', help="""Mount only list of projects shared with the user.""")
-    mount_mode.add_argument('--by-tag', action='store_true',
-                            help="""Mount subdirectories listed by tag.""")
-    mount_mode.add_argument('--by-id', action='store_true',
-                            help="""Mount subdirectories listed by portable data hash or uuid.""")
-    mount_mode.add_argument('--project', type=str, help="""Mount a specific project.""")
-    mount_mode.add_argument('--collection', type=str, help="""Mount only the specified collection.""")
-
-    parser.add_argument('--debug', action='store_true', help="""Debug mode""")
-    parser.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
-    parser.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
-    parser.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
-
-    parser.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
-    parser.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
-
-    parser.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
-    parser.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
-
-    parser.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
-                        dest="exec_args", metavar=('command', 'args', '...', '--'),
-                        help="""Mount, run a command, then unmount and exit""")
-
-    args = parser.parse_args()
-    args.mountpoint = os.path.realpath(args.mountpoint)
-    if args.logfile:
-        args.logfile = os.path.realpath(args.logfile)
-
-    # Daemonize as early as possible, so we don't accidentally close
-    # file descriptors we're using.
-    if not (args.exec_args or args.foreground):
-        os.chdir(args.mountpoint)
-        daemon_ctx = daemon.DaemonContext(working_directory='.')
-        daemon_ctx.open()
-    else:
-        daemon_ctx = None
-
-    # Configure a log handler based on command-line switches.
-    if args.logfile:
-        log_handler = logging.FileHandler(args.logfile)
-    elif daemon_ctx:
-        log_handler = logging.NullHandler()
-    else:
-        log_handler = None
-
-    if log_handler is not None:
-        arvados.logger.removeHandler(arvados.log_handler)
-        arvados.logger.addHandler(log_handler)
-
-    if args.debug:
-        arvados.logger.setLevel(logging.DEBUG)
-        logger.debug("arv-mount debugging enabled")
-
-    logger.info("enable write is %s", args.enable_write)
-
-    try:
-        # Create the request handler
-        operations = Operations(os.getuid(),
-                                os.getgid(),
-                                encoding=args.encoding,
-                                inode_cache=InodeCache(cap=args.directory_cache),
-                                enable_write=args.enable_write)
-        api = ThreadSafeApiCache(apiconfig=arvados.config.settings(),
-                                 keep_params={"block_cache": arvados.keep.KeepBlockCache(args.file_cache)})
-
-        usr = api.users().current().execute(num_retries=args.retries)
-        now = time.time()
-        dir_class = None
-        dir_args = [llfuse.ROOT_INODE, operations.inodes, api, args.retries]
-        if args.by_id:
-            # Set up the request handler with the 'magic directory' at the root
-            dir_class = MagicDirectory
-        elif args.by_tag:
-            dir_class = TagsDirectory
-        elif args.shared:
-            dir_class = SharedDirectory
-            dir_args.append(usr)
-        elif args.home:
-            dir_class = ProjectDirectory
-            dir_args.append(usr)
-            dir_args.append(True)
-        elif args.collection is not None:
-            # Set up the request handler with the collection at the root
-            dir_class = CollectionDirectory
-            dir_args.append(args.collection)
-        elif args.project is not None:
-            dir_class = ProjectDirectory
-            dir_args.append(api.groups().get(uuid=args.project).execute(
-                    num_retries=args.retries))
-
-        if dir_class is not None:
-            operations.inodes.add_entry(dir_class(*dir_args))
-        else:
-            e = operations.inodes.add_entry(Directory(llfuse.ROOT_INODE, operations.inodes))
-            dir_args[0] = e.inode
-
-            e._entries['by_id'] = operations.inodes.add_entry(MagicDirectory(*dir_args))
-            e._entries['by_tag'] = operations.inodes.add_entry(TagsDirectory(*dir_args))
-
-            dir_args.append(usr)
-            dir_args.append(True)
-            e._entries['home'] = operations.inodes.add_entry(ProjectDirectory(*dir_args))
-            e._entries['shared'] = operations.inodes.add_entry(SharedDirectory(*dir_args))
-
-            text = '''
-Welcome to Arvados!  This directory provides file system access to files and objects
-available on the Arvados installation located at '{}'
-using credentials for user '{}'.
-
-From here, the following directories are available:
-
-  by_id/     Access to Keep collections by uuid or portable data hash (see by_id/README for details).
-  by_tag/    Access to Keep collections organized by tag.
-  home/      The contents of your home project.
-  shared/    Projects shared with you.
-'''.format(arvados.config.get('ARVADOS_API_HOST'), usr['email'])
-
-            e._entries["README"] = operations.inodes.add_entry(StringFile(e.inode, text, now))
-
-
-    except Exception:
-        logger.exception("arv-mount: exception during API setup")
-        exit(1)
-
-    # FUSE options, see mount.fuse(8)
-    opts = [optname for optname in ['allow_other', 'debug']
-            if getattr(args, optname)]
-
-    # Increase default read/write size from 4KiB to 128KiB
-    opts += ["big_writes", "max_read=131072"]
-
-    if args.exec_args:
-        # Initialize the fuse connection
-        llfuse.init(operations, args.mountpoint, opts)
-
-        # Subscribe to change events from API server
-        operations.listen_for_events(api)
-
-        t = threading.Thread(None, lambda: llfuse.main())
-        t.start()
-
-        # wait until the driver is finished initializing
-        operations.initlock.wait()
-
-        rc = 255
-        try:
-            sp = subprocess.Popen(args.exec_args, shell=False)
-
-            # forward signals to the process.
-            signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum))
-            signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum))
-            signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum))
-
-            # wait for process to complete.
-            rc = sp.wait()
-
-            # restore default signal handlers.
-            signal.signal(signal.SIGINT, signal.SIG_DFL)
-            signal.signal(signal.SIGTERM, signal.SIG_DFL)
-            signal.signal(signal.SIGQUIT, signal.SIG_DFL)
-        except Exception as e:
-            logger.exception('arv-mount: exception during exec %s',
-                             args.exec_args)
-            try:
-                rc = e.errno
-            except AttributeError:
-                pass
-        finally:
-            subprocess.call(["fusermount", "-u", "-z", args.mountpoint])
-            operations.destroy()
-
-        exit(rc)
-    else:
-        try:
-            llfuse.init(operations, args.mountpoint, opts)
-
-            # Subscribe to change events from API server
-            operations.listen_for_events(api)
-
-            llfuse.main()
-        except Exception as e:
-            logger.exception('arv-mount: exception during mount')
-            exit(getattr(e, 'errno', 1))
-        finally:
-            operations.destroy()
+    args = arvados_fuse.command.ArgumentParser().parse_args()
+    arvados_fuse.command.Mount(args).run()
index d7ac773f35198b724fbc91db877026035bb832eb..1cedd6665001b86a5baf4e1d5cba2c2a32e9df6f 100644 (file)
@@ -28,8 +28,11 @@ setup(name='arvados_fuse',
       scripts=[
         'bin/arv-mount'
         ],
+      data_files=[
+          ('share/doc/arvados_fuse', ['agpl-3.0.txt', 'README.rst']),
+      ],
       install_requires=[
-        'arvados-python-client >= 0.1.20150625175218',
+        'arvados-python-client >= 0.1.20151118035730',
         'llfuse>=0.40',
         'python-daemon',
         'ciso8601'
diff --git a/services/fuse/tests/integration_test.py b/services/fuse/tests/integration_test.py
new file mode 100644 (file)
index 0000000..faa4a55
--- /dev/null
@@ -0,0 +1,68 @@
+import arvados
+import arvados_fuse
+import arvados_fuse.command
+import functools
+import inspect
+import multiprocessing
+import os
+import sys
+import tempfile
+import unittest
+import run_test_server
+
+def wrap_static_test_method(modName, clsName, funcName, args, kwargs):
+    class Test(unittest.TestCase):
+        def runTest(self, *args, **kwargs):
+            getattr(getattr(sys.modules[modName], clsName), funcName)(self, *args, **kwargs)
+    Test().runTest(*args, **kwargs)
+
+
+class IntegrationTest(unittest.TestCase):
+    def pool_test(self, *args, **kwargs):
+        """Run a static method as a unit test, in a different process.
+
+        If called by method 'foobar', the static method '_foobar' of
+        the same class will be called in the other process.
+        """
+        modName = inspect.getmodule(self).__name__
+        clsName = self.__class__.__name__
+        funcName = inspect.currentframe().f_back.f_code.co_name
+        pool = multiprocessing.Pool(1)
+        try:
+            pool.apply(
+                wrap_static_test_method,
+                (modName, clsName, '_'+funcName, args, kwargs))
+        finally:
+            pool.terminate()
+            pool.join()
+
+    @classmethod
+    def setUpClass(cls):
+        run_test_server.run()
+        run_test_server.run_keep(enforce_permissions=True, num_servers=2)
+
+    @classmethod
+    def tearDownClass(cls):
+        run_test_server.stop_keep(num_servers=2)
+
+    def setUp(self):
+        self.mnt = tempfile.mkdtemp()
+        run_test_server.authorize_with('active')
+        self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+
+    def tearDown(self):
+        os.rmdir(self.mnt)
+        run_test_server.reset()
+
+    @staticmethod
+    def mount(argv):
+        """Decorator. Sets up a FUSE mount at self.mnt with the given args."""
+        def decorator(func):
+            @functools.wraps(func)
+            def wrapper(self, *args, **kwargs):
+                with arvados_fuse.command.Mount(
+                        arvados_fuse.command.ArgumentParser().parse_args(
+                            argv + ['--foreground', self.mnt])):
+                    return func(self, *args, **kwargs)
+            return wrapper
+        return decorator
index 3b7cbaaadb0a8ab9c490c1a73a1648b54d89728c..f3c020c9a4aafeaa00bedcae62dd18bb787541c3 100644 (file)
@@ -35,7 +35,10 @@ class MountTestBase(unittest.TestCase):
         self.api = api if api else arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
 
     def make_mount(self, root_class, **root_kwargs):
-        self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
+        self.operations = fuse.Operations(
+            os.getuid(), os.getgid(),
+            api_client=self.api,
+            enable_write=True)
         self.operations.inodes.add_entry(root_class(
             llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
         llfuse.init(self.operations, self.mounttmp, [])
@@ -63,6 +66,7 @@ class MountTestBase(unittest.TestCase):
 
         os.rmdir(self.mounttmp)
         shutil.rmtree(self.keeptmp)
+        os.environ.pop('KEEP_LOCAL_STORE')
         run_test_server.reset()
 
     def assertDirContents(self, subdir, expect_content):
diff --git a/services/fuse/tests/test_command_args.py b/services/fuse/tests/test_command_args.py
new file mode 100644 (file)
index 0000000..19d56a9
--- /dev/null
@@ -0,0 +1,189 @@
+import arvados
+import arvados_fuse
+import arvados_fuse.command
+import contextlib
+import functools
+import json
+import llfuse
+import logging
+import os
+import run_test_server
+import sys
+import tempfile
+import unittest
+
+def noexit(func):
+    """If argparse or arvados_fuse tries to exit, fail the test instead"""
+    class SystemExitCaught(StandardError):
+        pass
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except SystemExit:
+            raise SystemExitCaught
+    return wrapper
+
+@contextlib.contextmanager
+def nostderr():
+    orig, sys.stderr = sys.stderr, open(os.devnull, 'w')
+    try:
+        yield
+    finally:
+        sys.stderr = orig
+
+
+class MountArgsTest(unittest.TestCase):
+    def setUp(self):
+        self.mntdir = tempfile.mkdtemp()
+        run_test_server.authorize_with('active')
+
+    def tearDown(self):
+        os.rmdir(self.mntdir)
+
+    def lookup(self, mnt, *path):
+        ent = mnt.operations.inodes[llfuse.ROOT_INODE]
+        for p in path:
+            ent = ent[p]
+        return ent
+
+    def check_ent_type(self, cls, *path):
+        ent = self.lookup(self.mnt, *path)
+        self.assertEqual(ent.__class__, cls)
+        return ent
+
+    @noexit
+    def test_default_all(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, None)
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.ProjectDirectory, 'home')
+        self.assertEqual(e.project_object['uuid'],
+                         run_test_server.fixture('users')['active']['uuid'])
+        e = self.check_ent_type(arvados_fuse.MagicDirectory, 'by_id')
+
+        e = self.check_ent_type(arvados_fuse.StringFile, 'README')
+        readme = e.readfrom(0, -1)
+        self.assertRegexpMatches(readme, r'active-user@arvados\.local')
+        self.assertRegexpMatches(readme, r'\n$')
+
+        e = self.check_ent_type(arvados_fuse.StringFile, 'by_id', 'README')
+        txt = e.readfrom(0, -1)
+        self.assertRegexpMatches(txt, r'portable data hash')
+        self.assertRegexpMatches(txt, r'\n$')
+
+    @noexit
+    def test_by_id(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--by-id',
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, 'by_id')
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.MagicDirectory)
+        self.assertEqual(e.pdh_only, False)
+
+    @noexit
+    def test_by_pdh(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--by-pdh',
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, 'by_pdh')
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.MagicDirectory)
+        self.assertEqual(e.pdh_only, True)
+
+    @noexit
+    def test_by_tag(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--by-tag',
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, 'by_tag')
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.TagsDirectory)
+
+    @noexit
+    def test_collection(self, id_type='uuid'):
+        c = run_test_server.fixture('collections')['public_text_file']
+        cid = c[id_type]
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--collection', cid,
+            '--foreground', self.mntdir])
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.CollectionDirectory)
+        self.assertEqual(e.collection_locator, cid)
+
+    def test_collection_pdh(self):
+        self.test_collection('portable_data_hash')
+
+    @noexit
+    def test_home(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--home',
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, 'home')
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.ProjectDirectory)
+        self.assertEqual(e.project_object['uuid'],
+                         run_test_server.fixture('users')['active']['uuid'])
+
+    def test_mutually_exclusive_args(self):
+        cid = run_test_server.fixture('collections')['public_text_file']['uuid']
+        gid = run_test_server.fixture('groups')['aproject']['uuid']
+        for badargs in [
+                ['--mount-tmp', 'foo', '--collection', cid],
+                ['--mount-tmp', 'foo', '--project', gid],
+                ['--collection', cid, '--project', gid],
+                ['--by-id', '--project', gid],
+                ['--mount-tmp', 'foo', '--by-id'],
+        ]:
+            with nostderr():
+                with self.assertRaises(SystemExit):
+                    args = arvados_fuse.command.ArgumentParser().parse_args(
+                        badargs + ['--foreground', self.mntdir])
+                    arvados_fuse.command.Mount(args)
+    @noexit
+    def test_project(self):
+        uuid = run_test_server.fixture('groups')['aproject']['uuid']
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--project', uuid,
+            '--foreground', self.mntdir])
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.ProjectDirectory)
+        self.assertEqual(e.project_object['uuid'], uuid)
+
+    @noexit
+    def test_shared(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--shared',
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, 'shared')
+        self.mnt = arvados_fuse.command.Mount(args)
+        e = self.check_ent_type(arvados_fuse.SharedDirectory)
+        self.assertEqual(e.current_user['uuid'],
+                         run_test_server.fixture('users')['active']['uuid'])
+
+    @noexit
+    def test_custom(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--mount-tmp', 'foo',
+            '--mount-tmp', 'bar',
+            '--mount-home', 'my_home',
+            '--foreground', self.mntdir])
+        self.assertEqual(args.mode, None)
+        self.mnt = arvados_fuse.command.Mount(args)
+        self.check_ent_type(arvados_fuse.Directory)
+        self.check_ent_type(arvados_fuse.TmpCollectionDirectory, 'foo')
+        self.check_ent_type(arvados_fuse.TmpCollectionDirectory, 'bar')
+        e = self.check_ent_type(arvados_fuse.ProjectDirectory, 'my_home')
+        self.assertEqual(e.project_object['uuid'],
+                         run_test_server.fixture('users')['active']['uuid'])
+
+    def test_custom_unsupported_layouts(self):
+        for name in ['.', '..', '', 'foo/bar', '/foo']:
+            with nostderr():
+                with self.assertRaises(SystemExit):
+                    args = arvados_fuse.command.ArgumentParser().parse_args([
+                        '--mount-tmp', name,
+                        '--foreground', self.mntdir])
+                    arvados_fuse.command.Mount(args)
index ff8883714512c8a8f6b6a0196a19cb128204d317..05c8685607f14a0e7938379ab8f4fab7f3001e63 100644 (file)
@@ -427,12 +427,16 @@ class FuseWriteFileTest(MountTestBase):
 
         self.assertNotIn("file1.txt", collection)
 
+        self.assertEqual(0, self.operations.write_counter.get())
         self.pool.apply(fuseWriteFileTestHelperWriteFile, (self.mounttmp,))
+        self.assertEqual(12, self.operations.write_counter.get())
 
         with collection.open("file1.txt") as f:
             self.assertEqual(f.read(), "Hello world!")
 
+        self.assertEqual(0, self.operations.read_counter.get())
         self.pool.apply(fuseWriteFileTestHelperReadFile, (self.mounttmp,))
+        self.assertEqual(12, self.operations.read_counter.get())
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
         self.assertRegexpMatches(collection2["manifest_text"],
@@ -699,7 +703,7 @@ class FuseUpdateFromEventTest(MountTestBase):
         with llfuse.lock:
             m.new_collection(collection.api_response(), collection)
 
-        self.operations.listen_for_events(self.api)
+        self.operations.listen_for_events()
 
         d1 = llfuse.listdir(os.path.join(self.mounttmp))
         self.assertEqual([], sorted(d1))
@@ -1060,3 +1064,66 @@ class FuseUnitTest(unittest.TestCase):
         self.assertEqual("_", fuse.sanitize_filename(""))
         self.assertEqual("_", fuse.sanitize_filename("."))
         self.assertEqual("__", fuse.sanitize_filename(".."))
+
+
+class FuseMagicTestPDHOnly(MountTestBase):
+    def setUp(self, api=None):
+        super(FuseMagicTestPDHOnly, self).setUp(api=api)
+
+        cw = arvados.CollectionWriter()
+
+        cw.start_new_file('thing1.txt')
+        cw.write("data 1")
+
+        self.testcollection = cw.finish()
+        self.test_manifest = cw.manifest_text()
+        created = self.api.collections().create(body={"manifest_text":self.test_manifest}).execute()
+        self.testcollectionuuid = str(created['uuid'])
+
+    def verify_pdh_only(self, pdh_only=False, skip_pdh_only=False):
+        if skip_pdh_only is True:
+            self.make_mount(fuse.MagicDirectory)    # in this case, the default by_id applies
+        else:
+            self.make_mount(fuse.MagicDirectory, pdh_only=pdh_only)
+
+        mount_ls = llfuse.listdir(self.mounttmp)
+        self.assertIn('README', mount_ls)
+        self.assertFalse(any(arvados.util.keep_locator_pattern.match(fn) or
+                             arvados.util.uuid_pattern.match(fn)
+                             for fn in mount_ls),
+                         "new FUSE MagicDirectory lists Collection")
+
+        # look up using pdh should succeed in all cases
+        self.assertDirContents(self.testcollection, ['thing1.txt'])
+        self.assertDirContents(os.path.join('by_id', self.testcollection),
+                               ['thing1.txt'])
+        mount_ls = llfuse.listdir(self.mounttmp)
+        self.assertIn('README', mount_ls)
+        self.assertIn(self.testcollection, mount_ls)
+        self.assertIn(self.testcollection,
+                      llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
+
+        files = {}
+        files[os.path.join(self.mounttmp, self.testcollection, 'thing1.txt')] = 'data 1'
+
+        for k, v in files.items():
+            with open(os.path.join(self.mounttmp, k)) as f:
+                self.assertEqual(v, f.read())
+
+        # look up using uuid should fail when pdh_only is set
+        if pdh_only is True:
+            with self.assertRaises(OSError):
+                self.assertDirContents(os.path.join('by_id', self.testcollectionuuid),
+                               ['thing1.txt'])
+        else:
+            self.assertDirContents(os.path.join('by_id', self.testcollectionuuid),
+                               ['thing1.txt'])
+
+    def test_with_pdh_only_true(self):
+        self.verify_pdh_only(pdh_only=True)
+
+    def test_with_pdh_only_false(self):
+        self.verify_pdh_only(pdh_only=False)
+
+    def test_with_default_by_id(self):
+        self.verify_pdh_only(skip_pdh_only=True)
diff --git a/services/fuse/tests/test_tmp_collection.py b/services/fuse/tests/test_tmp_collection.py
new file mode 100644 (file)
index 0000000..60eba1b
--- /dev/null
@@ -0,0 +1,124 @@
+import arvados
+import arvados_fuse
+import arvados_fuse.command
+import json
+import logging
+import os
+import tempfile
+import unittest
+
+from .integration_test import IntegrationTest
+from .mount_test_base import MountTestBase
+
+logger = logging.getLogger('arvados.arv-mount')
+
+
+class TmpCollectionArgsTest(unittest.TestCase):
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        os.rmdir(self.tmpdir)
+
+    def test_tmp_only(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--mount-tmp', 'tmp1',
+            '--mount-tmp', 'tmp2',
+            self.tmpdir,
+        ])
+        self.assertIn(args.mode, [None, 'custom'])
+        self.assertEqual(['tmp1', 'tmp2'], args.mount_tmp)
+        for mtype in ['home', 'shared', 'by_id', 'by_pdh', 'by_tag']:
+            self.assertEqual([], getattr(args, 'mount_'+mtype))
+
+    def test_tmp_and_home(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            '--mount-tmp', 'test_tmp',
+            '--mount-home', 'test_home',
+            self.tmpdir,
+        ])
+        self.assertIn(args.mode, [None, 'custom'])
+        self.assertEqual(['test_tmp'], args.mount_tmp)
+        self.assertEqual(['test_home'], args.mount_home)
+
+    def test_no_tmp(self):
+        args = arvados_fuse.command.ArgumentParser().parse_args([
+            self.tmpdir,
+        ])
+        self.assertEqual([], args.mount_tmp)
+
+
+def current_manifest(tmpdir):
+    return json.load(open(
+        os.path.join(tmpdir, '.arvados#collection')
+    ))['manifest_text']
+
+
+class TmpCollectionTest(IntegrationTest):
+    mnt_args = [
+        '--read-write',
+        '--mount-tmp', 'zzz',
+    ]
+
+    @IntegrationTest.mount(argv=mnt_args+['--mount-tmp', 'yyy'])
+    def test_two_tmp(self):
+        self.pool_test(os.path.join(self.mnt, 'zzz'),
+                       os.path.join(self.mnt, 'yyy'))
+    @staticmethod
+    def _test_two_tmp(self, zzz, yyy):
+        self.assertEqual(current_manifest(zzz), "")
+        self.assertEqual(current_manifest(yyy), "")
+        with open(os.path.join(zzz, 'foo'), 'w') as f:
+            f.write('foo')
+        self.assertNotEqual(current_manifest(zzz), "")
+        self.assertEqual(current_manifest(yyy), "")
+        os.unlink(os.path.join(zzz, 'foo'))
+        with open(os.path.join(yyy, 'bar'), 'w') as f:
+            f.write('bar')
+        self.assertEqual(current_manifest(zzz), "")
+        self.assertNotEqual(current_manifest(yyy), "")
+
+    @IntegrationTest.mount(argv=mnt_args)
+    def test_tmp_empty(self):
+        self.pool_test(os.path.join(self.mnt, 'zzz'))
+    @staticmethod
+    def _test_tmp_empty(self, tmpdir):
+        self.assertEqual(current_manifest(tmpdir), "")
+
+    @IntegrationTest.mount(argv=mnt_args)
+    def test_tmp_onefile(self):
+        self.pool_test(os.path.join(self.mnt, 'zzz'))
+    @staticmethod
+    def _test_tmp_onefile(self, tmpdir):
+        with open(os.path.join(tmpdir, 'foo'), 'w') as f:
+            f.write('foo')
+        self.assertRegexpMatches(
+            current_manifest(tmpdir),
+            r'^\. acbd18db4cc2f85cedef654fccc4a4d8\+3(\+\S+)? 0:3:foo\n$')
+
+    @IntegrationTest.mount(argv=mnt_args)
+    def test_tmp_snapshots(self):
+        self.pool_test(os.path.join(self.mnt, 'zzz'))
+    @staticmethod
+    def _test_tmp_snapshots(self, tmpdir):
+        ops = [
+            ('foo', 'bar',
+             r'^\. 37b51d194a7513e45b56f6524f2d51f2\+3(\+\S+)? 0:3:foo\n$'),
+            ('foo', 'foo',
+             r'^\. acbd18db4cc2f85cedef654fccc4a4d8\+3(\+\S+)? 0:3:foo\n$'),
+            ('bar', 'bar',
+             r'^\. 37b51d194a7513e45b56f6524f2d51f2\+3(\+\S+)? acbd18db4cc2f85cedef654fccc4a4d8\+3(\+\S+)? 0:3:bar 3:3:foo\n$'),
+            ('foo', None,
+             r'^\. 37b51d194a7513e45b56f6524f2d51f2\+3(\+\S+)? 0:3:bar\n$'),
+            ('bar', None,
+             r'^$'),
+        ]
+        for _ in range(10):
+            for fn, content, expect in ops:
+                path = os.path.join(tmpdir, fn)
+                if content is None:
+                    os.unlink(path)
+                else:
+                    with open(path, 'w') as f:
+                        f.write(content)
+                self.assertRegexpMatches(current_manifest(tmpdir), expect)
diff --git a/services/keep-web/.gitignore b/services/keep-web/.gitignore
new file mode 100644 (file)
index 0000000..53997c2
--- /dev/null
@@ -0,0 +1 @@
+keep-web
diff --git a/services/keep-web/anonymous.go b/services/keep-web/anonymous.go
new file mode 100644 (file)
index 0000000..15a98c2
--- /dev/null
@@ -0,0 +1,35 @@
+package main
+
+import (
+       "flag"
+       "fmt"
+       "os"
+       "strconv"
+)
+
+var anonymousTokens tokenSet
+
+type tokenSet []string
+
+func (ts *tokenSet) Set(s string) error {
+       v, err := strconv.ParseBool(s)
+       if v && len(*ts) == 0 {
+               *ts = append(*ts, os.Getenv("ARVADOS_API_TOKEN"))
+       } else if !v {
+               *ts = (*ts)[:0]
+       }
+       return err
+}
+
+func (ts *tokenSet) String() string {
+       return fmt.Sprintf("%v", len(*ts) > 0)
+}
+
+func (ts *tokenSet) IsBoolFlag() bool {
+       return true
+}
+
+func init() {
+       flag.Var(&anonymousTokens, "allow-anonymous",
+               "Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection.")
+}
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
new file mode 100644 (file)
index 0000000..4207d7b
--- /dev/null
@@ -0,0 +1,235 @@
+// Keep-web provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
+//
+// See http://doc.arvados.org/install/install-keep-web.html.
+//
+// Run "keep-web -help" to show all supported options.
+//
+// Starting the server
+//
+// Serve HTTP requests at port 1234 on all interfaces:
+//
+//   keep-web -listen=:1234
+//
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
+//
+//   keep-web -listen=1.2.3.4:1234
+//
+// Proxy configuration
+//
+// Keep-web does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+// Here is an example nginx configuration.
+//
+//     http {
+//       upstream keep-web {
+//         server localhost:1234;
+//       }
+//       server {
+//         listen *:443 ssl;
+//         server_name collections.example.com *.collections.example.com ~.*--collections.example.com;
+//         ssl_certificate /root/wildcard.example.com.crt;
+//         ssl_certificate_key /root/wildcard.example.com.key;
+//         location  / {
+//           proxy_pass http://keep-web;
+//           proxy_set_header Host $host;
+//           proxy_set_header X-Forwarded-For $remote_addr;
+//         }
+//       }
+//     }
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Anonymous downloads
+//
+// Use the -allow-anonymous flag with an ARVADOS_API_TOKEN environment
+// variable to specify a token to use when clients try to retrieve
+// files without providing their own Arvados API token.
+//
+//   export ARVADOS_API_TOKEN=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+//   keep-web [...] -allow-anonymous
+//
+// See http://doc.arvados.org/install/install-keep-web.html for examples.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections and collections shared anonymously via secret links
+// (i.e., collections which can be served by keep-web without making
+// use of any implicit credentials like cookies). See "Same-origin
+// URLs" below.
+//
+//   http://collections.example.com/c=uuid_or_pdh/path/file.txt
+//   http://collections.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--collections.example.com/path/file.txt
+//   http://uuid_or_pdh--collections.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the downstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--collections.example.com/path/file.txt
+//   http://uuid_or_pdh.collections.example.com/path/file.txt
+//
+// The first form (with "--" instead of ".") avoids the cost and
+// effort of deploying a wildcard TLS certificate for
+// *.collections.example.com at sites that already have a wildcard
+// certificate for *.example.com. The second form is likely to be
+// easier to configure, and more efficient to run, on a downstream
+// proxy.
+//
+// In all of the above forms, the "collections.example.com" part can
+// be anything at all: keep-web itself ignores everything after the
+// first "." or "--". (Of course, in order for clients to connect at
+// all, DNS and any relevant proxies must be configured accordingly.)
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// optionally replaced by "-". (When "uuid_or_pdh" appears in the
+// domain name, replacing "+" with "-" is mandatory, because "+" is
+// not a valid character in a domain name.)
+//
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/foo/bar.txt
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/_/foo/bar.txt
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--collections.example.com/_/foo/bar.txt
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo/bar.txt
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo/bar.txt
+//
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://collections.example.com/collections/download/uuid_or_pdh/TOKEN/foo/bar.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://collections.example.com/collections/uuid_or_pdh/foo/bar.txt
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo/bar.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo/bar.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Indexes
+//
+// Currently, keep-web does not generate HTML index listings, nor does
+// it serve a default file like "index.html" when a directory is
+// requested. These features are likely to be added in future
+// versions. Until then, keep-web responds with 404 if a directory
+// name (or any path ending with "/") is requested.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the downstream proxy should configured to
+// return 401 for all paths beginning with "/c=".
+//
+// Same-origin URLs
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://collections.example.com/'') and upload it to some other
+// site chosen by the author of collection X.
+//
+// Attachment-Only host
+//
+// It is possible to serve untrusted content and accept user
+// credentials at the same origin as long as the content is only
+// downloaded, never executed by browsers. A single origin (hostname
+// and port) can be designated as an "attachment-only" origin: cookies
+// will be accepted and all responses will have a
+// "Content-Disposition: attachment" header. This behavior is invoked
+// only when the designated origin matches exactly the Host header
+// provided by the client or downstream proxy.
+//
+//   keep-web -listen :9999 -attachment-only-host domain.example:9999
+//
+// Trust All Content mode
+//
+// In "trust all content" mode, Keep-web will accept credentials (API
+// tokens) and serve any collection X at
+// "https://collections.example.com/collections/X/path/file.ext".
+// This is UNSAFE except in the special case where everyone who is
+// able write ANY data to Keep, and every JavaScript and HTML file
+// written to Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+//   keep-web -listen :9999 -trust-all-content
+//
+// When using trust-all-content mode, the only effect of the
+// -attachment-only-host option is to add a "Content-Disposition:
+// attachment" header.
+//
+//   keep-web -listen :9999 -attachment-only-host domain.example:9999 -trust-all-content
+//
+package main
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
new file mode 100644 (file)
index 0000000..847329f
--- /dev/null
@@ -0,0 +1,370 @@
+package main
+
+import (
+       "flag"
+       "fmt"
+       "html"
+       "io"
+       "mime"
+       "net/http"
+       "net/url"
+       "os"
+       "regexp"
+       "strconv"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+type handler struct{}
+
+var (
+       clientPool         = arvadosclient.MakeClientPool()
+       trustAllContent    = false
+       attachmentOnlyHost = ""
+)
+
+func init() {
+       flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
+               "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
+       flag.BoolVar(&trustAllContent, "trust-all-content", false,
+               "Serve non-public content from a single origin. Dangerous: read docs before using!")
+}
+
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
+func parseCollectionIDFromDNSName(s string) string {
+       // Strip domain.
+       if i := strings.IndexRune(s, '.'); i >= 0 {
+               s = s[:i]
+       }
+       // Names like {uuid}--collections.example.com serve the same
+       // purpose as {uuid}.collections.example.com but can reduce
+       // cost/effort of using [additional] wildcard certificates.
+       if i := strings.Index(s, "--"); i >= 0 {
+               s = s[:i]
+       }
+       if arvadosclient.UUIDMatch(s) {
+               return s
+       }
+       if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+               return pdh
+       }
+       return ""
+}
+
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIDFromURL(s string) string {
+       if arvadosclient.UUIDMatch(s) {
+               return s
+       }
+       if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+               return pdh
+       }
+       return ""
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+       var statusCode = 0
+       var statusText string
+
+       remoteAddr := r.RemoteAddr
+       if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+               remoteAddr = xff + "," + remoteAddr
+       }
+
+       w := httpserver.WrapResponseWriter(wOrig)
+       defer func() {
+               if statusCode == 0 {
+                       statusCode = w.WroteStatus()
+               } else if w.WroteStatus() == 0 {
+                       w.WriteHeader(statusCode)
+               } else if w.WroteStatus() != statusCode {
+                       httpserver.Log(r.RemoteAddr, "WARNING",
+                               fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+               }
+               if statusText == "" {
+                       statusText = http.StatusText(statusCode)
+               }
+               httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+       }()
+
+       if r.Method != "GET" && r.Method != "POST" {
+               statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+               return
+       }
+
+       arv := clientPool.Get()
+       if arv == nil {
+               statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+               return
+       }
+       defer clientPool.Put(arv)
+
+       pathParts := strings.Split(r.URL.Path[1:], "/")
+
+       var targetID string
+       var targetPath []string
+       var tokens []string
+       var reqTokens []string
+       var pathToken bool
+       var attachment bool
+       credentialsOK := trustAllContent
+
+       if r.Host != "" && r.Host == attachmentOnlyHost {
+               credentialsOK = true
+               attachment = true
+       } else if r.FormValue("disposition") == "attachment" {
+               attachment = true
+       }
+
+       if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
+               // http://ID.collections.example/PATH...
+               credentialsOK = true
+               targetPath = pathParts
+       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+               // /c=ID/PATH...
+               targetID = parseCollectionIDFromURL(pathParts[0][2:])
+               targetPath = pathParts[1:]
+       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+               if len(pathParts) >= 5 && pathParts[1] == "download" {
+                       // /collections/download/ID/TOKEN/PATH...
+                       targetID = pathParts[2]
+                       tokens = []string{pathParts[3]}
+                       targetPath = pathParts[4:]
+                       pathToken = true
+               } else {
+                       // /collections/ID/PATH...
+                       targetID = pathParts[1]
+                       tokens = anonymousTokens
+                       targetPath = pathParts[2:]
+               }
+       } else {
+               statusCode = http.StatusNotFound
+               return
+       }
+       if t := r.FormValue("api_token"); t != "" {
+               // The client provided an explicit token in the query
+               // string, or a form in POST body. We must put the
+               // token in an HttpOnly cookie, and redirect to the
+               // same URL with the query param redacted and method =
+               // GET.
+
+               if !credentialsOK {
+                       // It is not safe to copy the provided token
+                       // into a cookie unless the current vhost
+                       // (origin) serves only a single collection or
+                       // we are in trustAllContent mode.
+                       statusCode = http.StatusBadRequest
+                       return
+               }
+
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "arvados_api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(t)),
+                       Path:     "/",
+                       HttpOnly: true,
+               })
+
+               // Propagate query parameters (except api_token) from
+               // the original request.
+               redirQuery := r.URL.Query()
+               redirQuery.Del("api_token")
+
+               redir := (&url.URL{
+                       Host:     r.Host,
+                       Path:     r.URL.Path,
+                       RawQuery: redirQuery.Encode(),
+               }).String()
+
+               w.Header().Add("Location", redir)
+               statusCode, statusText = http.StatusSeeOther, redir
+               w.WriteHeader(statusCode)
+               io.WriteString(w, `<A href="`)
+               io.WriteString(w, html.EscapeString(redir))
+               io.WriteString(w, `">Continue</A>`)
+               return
+       }
+
+       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+               // http://ID.example/t=TOKEN/PATH...
+               // /c=ID/t=TOKEN/PATH...
+               //
+               // This form must only be used to pass scoped tokens
+               // that give permission for a single collection. See
+               // FormValue case above.
+               tokens = []string{targetPath[0][2:]}
+               pathToken = true
+               targetPath = targetPath[1:]
+       }
+
+       if tokens == nil {
+               if credentialsOK {
+                       reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+               }
+               tokens = append(reqTokens, anonymousTokens...)
+       }
+
+       if len(targetPath) > 0 && targetPath[0] == "_" {
+               // If a collection has a directory called "t=foo" or
+               // "_", it can be served at
+               // //collections.example/_/t=foo/ or
+               // //collections.example/_/_/ respectively:
+               // //collections.example/t=foo/ won't work because
+               // t=foo will be interpreted as a token "foo".
+               targetPath = targetPath[1:]
+       }
+
+       tokenResult := make(map[string]int)
+       collection := make(map[string]interface{})
+       found := false
+       for _, arv.ApiToken = range tokens {
+               err := arv.Get("collections", targetID, nil, &collection)
+               if err == nil {
+                       // Success
+                       found = true
+                       break
+               }
+               if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+                       switch srvErr.HttpStatusCode {
+                       case 404, 401:
+                               // Token broken or insufficient to
+                               // retrieve collection
+                               tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+                               continue
+                       }
+               }
+               // Something more serious is wrong
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       if !found {
+               if pathToken || !credentialsOK {
+                       // Either the URL is a "secret sharing link"
+                       // that didn't work out (and asking the client
+                       // for additional credentials would just be
+                       // confusing), or we don't even accept
+                       // credentials at this path.
+                       statusCode = http.StatusNotFound
+                       return
+               }
+               for _, t := range reqTokens {
+                       if tokenResult[t] == 404 {
+                               // The client provided valid token(s), but the
+                               // collection was not found.
+                               statusCode = http.StatusNotFound
+                               return
+                       }
+               }
+               // The client's token was invalid (e.g., expired), or
+               // the client didn't even provide one.  Propagate the
+               // 401 to encourage the client to use a [different]
+               // token.
+               //
+               // TODO(TC): This response would be confusing to
+               // someone trying (anonymously) to download public
+               // data that has been deleted.  Allow a referrer to
+               // provide this context somehow?
+               w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
+               statusCode = http.StatusUnauthorized
+               return
+       }
+
+       filename := strings.Join(targetPath, "/")
+       kc, err := keepclient.MakeKeepClient(arv)
+       if err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       rdr, err := kc.CollectionFileReader(collection, filename)
+       if os.IsNotExist(err) {
+               statusCode = http.StatusNotFound
+               return
+       } else if err != nil {
+               statusCode, statusText = http.StatusBadGateway, err.Error()
+               return
+       }
+       defer rdr.Close()
+
+       basenamePos := strings.LastIndex(filename, "/")
+       if basenamePos < 0 {
+               basenamePos = 0
+       }
+       extPos := strings.LastIndex(filename, ".")
+       if extPos > basenamePos {
+               // Now extPos is safely >= 0.
+               if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+                       w.Header().Set("Content-Type", t)
+               }
+       }
+       if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
+               w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
+       }
+
+       applyContentDispositionHdr(w, r, filename[basenamePos:], attachment)
+       rangeRdr, statusCode := applyRangeHdr(w, r, rdr)
+
+       w.WriteHeader(statusCode)
+       _, err = io.Copy(w, rangeRdr)
+       if err != nil {
+               statusCode, statusText = http.StatusBadGateway, err.Error()
+       }
+}
+
+var rangeRe = regexp.MustCompile(`^bytes=0-([0-9]*)$`)
+
+func applyRangeHdr(w http.ResponseWriter, r *http.Request, rdr keepclient.ReadCloserWithLen) (io.Reader, int) {
+       w.Header().Set("Accept-Ranges", "bytes")
+       hdr := r.Header.Get("Range")
+       fields := rangeRe.FindStringSubmatch(hdr)
+       if fields == nil {
+               return rdr, http.StatusOK
+       }
+       rangeEnd, err := strconv.ParseInt(fields[1], 10, 64)
+       if err != nil {
+               // Empty or too big for int64 == send entire content
+               return rdr, http.StatusOK
+       }
+       if uint64(rangeEnd) >= rdr.Len() {
+               return rdr, http.StatusOK
+       }
+       w.Header().Set("Content-Length", fmt.Sprintf("%d", rangeEnd+1))
+       w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", 0, rangeEnd, rdr.Len()))
+       return &io.LimitedReader{R: rdr, N: rangeEnd + 1}, http.StatusPartialContent
+}
+
+func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
+       disposition := "inline"
+       if isAttachment {
+               disposition = "attachment"
+       }
+       if strings.ContainsRune(r.RequestURI, '?') {
+               // Help the UA realize that the filename is just
+               // "filename.txt", not
+               // "filename.txt?disposition=attachment".
+               //
+               // TODO(TC): Follow advice at RFC 6266 appendix D
+               disposition += "; filename=" + strconv.QuoteToASCII(filename)
+       }
+       if disposition != "inline" {
+               w.Header().Set("Content-Disposition", disposition)
+       }
+}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
new file mode 100644 (file)
index 0000000..94a1cf7
--- /dev/null
@@ -0,0 +1,419 @@
+package main
+
+import (
+       "html"
+       "io/ioutil"
+       "net/http"
+       "net/http/httptest"
+       "net/url"
+       "regexp"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct{}
+
+func mustParseURL(s string) *url.URL {
+       r, err := url.Parse(s)
+       if err != nil {
+               panic("parse URL: " + s)
+       }
+       return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+       for _, testURL := range []string{
+               arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+               arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+       } {
+               resp := httptest.NewRecorder()
+               u := mustParseURL(testURL)
+               req := &http.Request{
+                       Method:     "GET",
+                       URL:        u,
+                       RequestURI: u.RequestURI(),
+               }
+               (&handler{}).ServeHTTP(resp, req)
+               c.Check(resp.Code, check.Equals, http.StatusNotFound)
+               c.Check(resp.Body.String(), check.Equals, "")
+       }
+}
+
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keep-web if
+// the token is invalid.
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+       doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+       r.Header.Add("Authorization", "OAuth2 "+tok)
+       return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+       doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+       r.AddCookie(&http.Cookie{
+               Name:  "arvados_api_token",
+               Value: auth.EncodeTokenCookie([]byte(tok)),
+       })
+       return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+       doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+       r.URL.Path = "/t=" + tok + r.URL.Path
+       return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+       doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+       r.URL.RawQuery = "api_token=" + tok
+       return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+       doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+       r.Method = "POST"
+       r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+       r.Body = ioutil.NopCloser(strings.NewReader(
+               url.Values{"api_token": {tok}}.Encode()))
+       return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+       for _, hostPath := range []string{
+               arvadostest.FooCollection + ".example.com/foo",
+               arvadostest.FooCollection + "--collections.example.com/foo",
+               arvadostest.FooCollection + "--collections.example.com/_/foo",
+               arvadostest.FooPdh + ".example.com/foo",
+               strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--collections.example.com/foo",
+               arvadostest.FooBarDirCollection + ".example.com/dir1/foo",
+       } {
+               c.Log("doRequests: ", hostPath)
+               doVhostRequestsWithHostPath(c, authz, hostPath)
+       }
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
+       for _, tok := range []string{
+               arvadostest.ActiveToken,
+               arvadostest.ActiveToken[:15],
+               arvadostest.SpectatorToken,
+               "bogus",
+               "",
+       } {
+               u := mustParseURL("http://" + hostPath)
+               req := &http.Request{
+                       Method:     "GET",
+                       Host:       u.Host,
+                       URL:        u,
+                       RequestURI: u.RequestURI(),
+                       Header:     http.Header{},
+               }
+               failCode := authz(req, tok)
+               resp := doReq(req)
+               code, body := resp.Code, resp.Body.String()
+               if tok == arvadostest.ActiveToken {
+                       c.Check(code, check.Equals, http.StatusOK)
+                       c.Check(body, check.Equals, "foo")
+               } else {
+                       c.Check(code >= 400, check.Equals, true)
+                       c.Check(code < 500, check.Equals, true)
+                       if tok == arvadostest.SpectatorToken {
+                               // Valid token never offers to retry
+                               // with different credentials.
+                               c.Check(code, check.Equals, http.StatusNotFound)
+                       } else {
+                               // Invalid token can ask to retry
+                               // depending on the authz method.
+                               c.Check(code, check.Equals, failCode)
+                       }
+                       c.Check(body, check.Equals, "")
+               }
+       }
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+       resp := httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       if resp.Code != http.StatusSeeOther {
+               return resp
+       }
+       cookies := (&http.Response{Header: resp.Header()}).Cookies()
+       u, _ := req.URL.Parse(resp.Header().Get("Location"))
+       req = &http.Request{
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{},
+       }
+       for _, c := range cookies {
+               req.AddCookie(c)
+       }
+       return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               arvadostest.FooCollection+".example.com/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+}
+
+func (s *IntegrationSuite) TestSingleOriginSecretLink(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.ActiveToken+"/foo",
+               "",
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+}
+
+// Bad token in URL is 404 Not Found because it doesn't make sense to
+// retry the same URL with different authorization.
+func (s *IntegrationSuite) TestSingleOriginSecretLinkBadToken(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/t=bogus/foo",
+               "",
+               "",
+               "",
+               http.StatusNotFound,
+               "",
+       )
+}
+
+// Bad token in a cookie (even if it got there via our own
+// query-string-to-cookie redirect) is, in principle, retryable at the
+// same URL so it's 401 Unauthorized.
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToBogusCookie(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               arvadostest.FooCollection+".example.com/foo",
+               "?api_token=thisisabogustoken",
+               "",
+               "",
+               http.StatusUnauthorized,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusBadRequest,
+               "",
+       )
+}
+
+// If client requests an attachment by putting ?disposition=attachment
+// in the query string, and gets redirected, the redirect target
+// should respond with an attachment.
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenRequestAttachment(c *check.C) {
+       resp := s.testVhostRedirectTokenToCookie(c, "GET",
+               arvadostest.FooCollection+".example.com/foo",
+               "?disposition=attachment&api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+       c.Check(strings.Split(resp.Header().Get("Content-Disposition"), ";")[0], check.Equals, "attachment")
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
+       defer func(orig bool) {
+               trustAllContent = orig
+       }(trustAllContent)
+       trustAllContent = true
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
+       defer func(orig string) {
+               attachmentOnlyHost = orig
+       }(attachmentOnlyHost)
+       attachmentOnlyHost = "example.com:1234"
+
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusBadRequest,
+               "",
+       )
+
+       resp := s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com:1234/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+       c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "POST",
+               arvadostest.FooCollection+".example.com/foo",
+               "",
+               "application/x-www-form-urlencoded",
+               url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+               http.StatusOK,
+               "foo",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "POST",
+               arvadostest.FooCollection+".example.com/foo",
+               "",
+               "application/x-www-form-urlencoded",
+               url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+               http.StatusNotFound,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
+       anonymousTokens = []string{arvadostest.AnonymousToken}
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
+               "",
+               "",
+               "",
+               http.StatusOK,
+               "Hello world\n",
+       )
+}
+
+func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
+       anonymousTokens = []string{"anonymousTokenConfiguredButInvalid"}
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
+               "",
+               "",
+               "",
+               http.StatusNotFound,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) TestRange(c *check.C) {
+       u, _ := url.Parse("http://example.com/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt")
+       req := &http.Request{
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{"Range": {"bytes=0-4"}},
+       }
+       resp := httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusPartialContent)
+       c.Check(resp.Body.String(), check.Equals, "Hello")
+       c.Check(resp.Header().Get("Content-Length"), check.Equals, "5")
+       c.Check(resp.Header().Get("Content-Range"), check.Equals, "bytes 0-4/12")
+
+       req.Header.Set("Range", "bytes=0-")
+       resp = httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       // 200 and 206 are both correct:
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(resp.Body.String(), check.Equals, "Hello world\n")
+       c.Check(resp.Header().Get("Content-Length"), check.Equals, "12")
+
+       // Unsupported ranges are ignored
+       for _, hdr := range []string{
+               "bytes=5-5",  // non-zero start byte
+               "bytes=-5",   // last 5 bytes
+               "cubits=0-5", // unsupported unit
+               "bytes=0-340282366920938463463374607431768211456", // 2^128
+       } {
+               req.Header.Set("Range", hdr)
+               resp = httptest.NewRecorder()
+               (&handler{}).ServeHTTP(resp, req)
+               c.Check(resp.Code, check.Equals, http.StatusOK)
+               c.Check(resp.Body.String(), check.Equals, "Hello world\n")
+               c.Check(resp.Header().Get("Content-Length"), check.Equals, "12")
+               c.Check(resp.Header().Get("Content-Range"), check.Equals, "")
+               c.Check(resp.Header().Get("Accept-Ranges"), check.Equals, "bytes")
+       }
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, reqBody string, expectStatus int, expectRespBody string) *httptest.ResponseRecorder {
+       u, _ := url.Parse(`http://` + hostPath + queryString)
+       req := &http.Request{
+               Method:     method,
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{"Content-Type": {contentType}},
+               Body:       ioutil.NopCloser(strings.NewReader(reqBody)),
+       }
+
+       resp := httptest.NewRecorder()
+       defer func() {
+               c.Check(resp.Code, check.Equals, expectStatus)
+               c.Check(resp.Body.String(), check.Equals, expectRespBody)
+       }()
+
+       (&handler{}).ServeHTTP(resp, req)
+       if resp.Code != http.StatusSeeOther {
+               return resp
+       }
+       c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
+       cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+       u, _ = u.Parse(resp.Header().Get("Location"))
+       req = &http.Request{
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{},
+       }
+       for _, c := range cookies {
+               req.AddCookie(c)
+       }
+
+       resp = httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       c.Check(resp.Header().Get("Location"), check.Equals, "")
+       return resp
+}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
new file mode 100644 (file)
index 0000000..135f01b
--- /dev/null
@@ -0,0 +1,33 @@
+package main
+
+import (
+       "flag"
+       "log"
+       "os"
+)
+
+func init() {
+       // MakeArvadosClient returns an error if this env var isn't
+       // available as a default token (even if we explicitly set a
+       // different token before doing anything with the client). We
+       // set this dummy value during init so it doesn't clobber the
+       // one used by "run test servers".
+       if os.Getenv("ARVADOS_API_TOKEN") == "" {
+               os.Setenv("ARVADOS_API_TOKEN", "xxx")
+       }
+}
+
+func main() {
+       flag.Parse()
+       if os.Getenv("ARVADOS_API_HOST") == "" {
+               log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+       }
+       srv := &server{}
+       if err := srv.Start(); err != nil {
+               log.Fatal(err)
+       }
+       log.Println("Listening at", srv.Addr)
+       if err := srv.Wait(); err != nil {
+               log.Fatal(err)
+       }
+}
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
new file mode 100644 (file)
index 0000000..1009008
--- /dev/null
@@ -0,0 +1,27 @@
+package main
+
+import (
+       "flag"
+       "net/http"
+
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+       flag.StringVar(&address, "listen", ":80",
+               "Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
+}
+
+type server struct {
+       httpserver.Server
+}
+
+func (srv *server) Start() error {
+       mux := http.NewServeMux()
+       mux.Handle("/", &handler{})
+       srv.Handler = mux
+       srv.Addr = address
+       return srv.Server.Start()
+}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
new file mode 100644 (file)
index 0000000..cda8b17
--- /dev/null
@@ -0,0 +1,322 @@
+package main
+
+import (
+       "crypto/md5"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "net"
+       "os/exec"
+       "strings"
+       "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server and a keep-web server
+type IntegrationSuite struct {
+       testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+       for _, token := range []string{
+               "",
+               "bogustoken",
+       } {
+               hdr, body, _ := s.runCurl(c, token, "collections.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+               c.Check(body, check.Equals, "")
+
+               if token != "" {
+                       hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+                       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+                       c.Check(body, check.Equals, "")
+               }
+
+               hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/bad-route")
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+               c.Check(body, check.Equals, "")
+       }
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+       for _, uri := range []string{
+               // Routing errors (always 404 regardless of what's stored in Keep)
+               "/",
+               "/foo",
+               "/download",
+               "/collections",
+               "/collections/",
+               // Implicit/generated index is not implemented yet;
+               // until then, return 404.
+               "/collections/" + arvadostest.FooCollection,
+               "/collections/" + arvadostest.FooCollection + "/",
+               "/collections/" + arvadostest.FooBarDirCollection + "/dir1",
+               "/collections/" + arvadostest.FooBarDirCollection + "/dir1/",
+               // Non-existent file in collection
+               "/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+               "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+               // Non-existent collection
+               "/collections/" + arvadostest.NonexistentCollection,
+               "/collections/" + arvadostest.NonexistentCollection + "/",
+               "/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+               "/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+       } {
+               hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "collections.example.com", uri)
+               c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+               c.Check(body, check.Equals, "")
+       }
+}
+
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+       if testing.Short() {
+               c.Skip("skipping 1GB integration test in short mode")
+       }
+       s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+       s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+       testdata := make([]byte, blocksize)
+       for i := 0; i < blocksize; i++ {
+               testdata[i] = byte(' ')
+       }
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.Equals, nil)
+       arv.ApiToken = arvadostest.ActiveToken
+       kc, err := keepclient.MakeKeepClient(&arv)
+       c.Assert(err, check.Equals, nil)
+       loc, _, err := kc.PutB(testdata[:])
+       c.Assert(err, check.Equals, nil)
+       mtext := "."
+       for i := 0; i < 100; i++ {
+               mtext = mtext + " " + loc
+       }
+       mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+       coll := map[string]interface{}{}
+       err = arv.Create("collections",
+               map[string]interface{}{
+                       "collection": map[string]interface{}{
+                               "name":          fmt.Sprintf("testdata blocksize=%d", blocksize),
+                               "manifest_text": mtext,
+                       },
+               }, &coll)
+       c.Assert(err, check.Equals, nil)
+       uuid := coll["uuid"].(string)
+
+       hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".collections.example.com", "/testdata.bin")
+       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+       c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+       c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+       c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
+type curlCase struct {
+       auth    string
+       host    string
+       path    string
+       dataMD5 string
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+       anonymousTokens = []string{arvadostest.AnonymousToken}
+       for _, spec := range []curlCase{
+               // My collection
+               {
+                       auth:    arvadostest.ActiveToken,
+                       host:    arvadostest.FooCollection + "--collections.example.com",
+                       path:    "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    arvadostest.ActiveToken,
+                       host:    arvadostest.FooCollection + ".collections.example.com",
+                       path:    "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       host:    strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".collections.example.com",
+                       path:    "/t=" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path:    "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path:    "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    "tokensobogus",
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    arvadostest.ActiveToken,
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    arvadostest.AnonymousToken,
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+
+               // Anonymously accessible data
+               {
+                       path:    "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       host:    arvadostest.HelloWorldCollection + ".collections.example.com",
+                       path:    "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       host:    arvadostest.HelloWorldCollection + ".collections.example.com",
+                       path:    "/_/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.ActiveToken,
+                       path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.SpectatorToken,
+                       path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.SpectatorToken,
+                       host:    arvadostest.HelloWorldCollection + "--collections.example.com",
+                       path:    "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.SpectatorToken,
+                       path:    "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+       } {
+               host := spec.host
+               if host == "" {
+                       host = "collections.example.com"
+               }
+               hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+               if strings.HasSuffix(spec.path, ".txt") {
+                       c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+                       // TODO: Check some types that aren't
+                       // automatically detected by Go's http server
+                       // by sniffing the content.
+               }
+               c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
+       }
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
+       curlArgs := []string{"--silent", "--show-error", "--include"}
+       testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+       curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
+       if token != "" {
+               curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+       }
+       curlArgs = append(curlArgs, args...)
+       curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
+       c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+       cmd := exec.Command("curl", curlArgs...)
+       stdout, err := cmd.StdoutPipe()
+       c.Assert(err, check.Equals, nil)
+       cmd.Stderr = cmd.Stdout
+       go cmd.Start()
+       buf := make([]byte, 2<<27)
+       n, err := io.ReadFull(stdout, buf)
+       // Discard (but measure size of) anything past 128 MiB.
+       var discarded int64
+       if err == io.ErrUnexpectedEOF {
+               err = nil
+               buf = buf[:n]
+       } else {
+               c.Assert(err, check.Equals, nil)
+               discarded, err = io.Copy(ioutil.Discard, stdout)
+               c.Assert(err, check.Equals, nil)
+       }
+       err = cmd.Wait()
+       // Without "-f", curl exits 0 as long as it gets a valid HTTP
+       // response from the server, even if the response status
+       // indicates that the request failed. In our test suite, we
+       // always expect a valid HTTP response, and we parse the
+       // headers ourselves. If curl exits non-zero, our testing
+       // environment is broken.
+       c.Assert(err, check.Equals, nil)
+       hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
+       c.Assert(len(hdrsAndBody), check.Equals, 2)
+       hdr = hdrsAndBody[0]
+       bodyPart = hdrsAndBody[1]
+       bodySize = int64(len(bodyPart)) + discarded
+       return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+       arvadostest.StartAPI()
+       arvadostest.StartKeep(2, true)
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.Equals, nil)
+       arv.ApiToken = arvadostest.ActiveToken
+       kc, err := keepclient.MakeKeepClient(&arv)
+       c.Assert(err, check.Equals, nil)
+       kc.PutB([]byte("Hello world\n"))
+       kc.PutB([]byte("foo"))
+       kc.PutB([]byte("foobar"))
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+       arvadostest.StopKeep(2)
+       arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+       arvadostest.ResetEnv()
+       s.testServer = &server{}
+       var err error
+       address = "127.0.0.1:0"
+       err = s.testServer.Start()
+       c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+       var err error
+       if s.testServer != nil {
+               err = s.testServer.Close()
+       }
+       c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
index d0af4a58ea5e7746d8243fb6272820e8c4801307..7b5cd2befb8f69bd25fa62674d01590214aec5ad 100644 (file)
@@ -14,7 +14,6 @@ import (
        "net/http"
        "os"
        "os/signal"
-       "reflect"
        "regexp"
        "sync"
        "syscall"
@@ -22,8 +21,8 @@ import (
 )
 
 // Default TCP address on which to listen for requests.
-// Initialized by the -listen flag.
-const DEFAULT_ADDR = ":25107"
+// Override with -listen.
+const DefaultAddr = ":25107"
 
 var listener net.Listener
 
@@ -37,12 +36,12 @@ func main() {
                pidfile          string
        )
 
-       flagset := flag.NewFlagSet("default", flag.ExitOnError)
+       flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
        flagset.StringVar(
                &listen,
                "listen",
-               DEFAULT_ADDR,
+               DefaultAddr,
                "Interface on which to listen for requests, in the format "+
                        "ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port "+
                        "to listen on all network interfaces.")
@@ -84,6 +83,9 @@ func main() {
                log.Fatalf("Error setting up arvados client %s", err.Error())
        }
 
+       if os.Getenv("ARVADOS_DEBUG") != "" {
+               keepclient.DebugPrintf = log.Printf
+       }
        kc, err := keepclient.MakeKeepClient(&arv)
        if err != nil {
                log.Fatalf("Error setting up keep client %s", err.Error())
@@ -100,15 +102,14 @@ func main() {
        }
 
        kc.Want_replicas = default_replicas
-
        kc.Client.Timeout = time.Duration(timeout) * time.Second
+       go kc.RefreshServices(5*time.Minute, 3*time.Second)
 
        listener, err = net.Listen("tcp", listen)
        if err != nil {
                log.Fatalf("Could not listen on %v", listen)
        }
-
-       go RefreshServicesList(kc)
+       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
 
        // Shut down the server gracefully (by closing the listener)
        // if SIGTERM is received.
@@ -121,9 +122,7 @@ func main() {
        signal.Notify(term, syscall.SIGTERM)
        signal.Notify(term, syscall.SIGINT)
 
-       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
-
-       // Start listening for requests.
+       // Start serving requests.
        http.Serve(listener, MakeRESTRouter(!no_get, !no_put, kc))
 
        log.Println("shutting down")
@@ -135,30 +134,6 @@ type ApiTokenCache struct {
        expireTime int64
 }
 
-// Refresh the keep service list every five minutes.
-func RefreshServicesList(kc *keepclient.KeepClient) {
-       var previousRoots = []map[string]string{}
-       var delay time.Duration = 0
-       for {
-               time.Sleep(delay * time.Second)
-               delay = 300
-               if err := kc.DiscoverKeepServers(); err != nil {
-                       log.Println("Error retrieving services list:", err)
-                       delay = 3
-                       continue
-               }
-               newRoots := []map[string]string{kc.LocalRoots(), kc.GatewayRoots()}
-               if !reflect.DeepEqual(previousRoots, newRoots) {
-                       log.Printf("Updated services list: locals %v gateways %v", newRoots[0], newRoots[1])
-               }
-               if len(newRoots[0]) == 0 {
-                       log.Print("WARNING: No local services. Retrying in 3 seconds.")
-                       delay = 3
-               }
-               previousRoots = newRoots
-       }
-}
-
 // Cache the token and set an expire time.  If we already have an expire time
 // on the token, it is not updated.
 func (this *ApiTokenCache) RememberToken(token string) {
@@ -191,17 +166,13 @@ func (this *ApiTokenCache) RecallToken(token string) bool {
 }
 
 func GetRemoteAddress(req *http.Request) string {
-       if realip := req.Header.Get("X-Real-IP"); realip != "" {
-               if forwarded := req.Header.Get("X-Forwarded-For"); forwarded != realip {
-                       return fmt.Sprintf("%s (X-Forwarded-For %s)", realip, forwarded)
-               } else {
-                       return realip
-               }
+       if xff := req.Header.Get("X-Forwarded-For"); xff != "" {
+               return xff + "," + req.RemoteAddr
        }
        return req.RemoteAddr
 }
 
-func CheckAuthorizationHeader(kc keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
+func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
        var auth string
        if auth = req.Header.Get("Authorization"); auth == "" {
                return false, ""
@@ -241,6 +212,11 @@ type PutBlockHandler struct {
        *ApiTokenCache
 }
 
+type IndexHandler struct {
+       *keepclient.KeepClient
+       *ApiTokenCache
+}
+
 type InvalidPathHandler struct{}
 
 type OptionsHandler struct{}
@@ -262,6 +238,12 @@ func MakeRESTRouter(
                rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`,
                        GetBlockHandler{kc, t}).Methods("GET", "HEAD")
                rest.Handle(`/{locator:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
+
+               // List all blocks
+               rest.Handle(`/index`, IndexHandler{kc, t}).Methods("GET")
+
+               // List blocks whose hash has the given prefix
+               rest.Handle(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler{kc, t}).Methods("GET")
        }
 
        if enable_put {
@@ -320,7 +302,7 @@ func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
 
        var pass bool
        var tok string
-       if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
+       if pass, tok = CheckAuthorizationHeader(&kc, this.ApiTokenCache, req); !pass {
                status, err = http.StatusForbidden, BadAuthorizationHeader
                return
        }
@@ -351,7 +333,7 @@ func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
                log.Println("Warning:", GetRemoteAddress(req), req.Method, proxiedURI, "Content-Length not provided")
        }
 
-       switch err {
+       switch respErr := err.(type) {
        case nil:
                status = http.StatusOK
                resp.Header().Set("Content-Length", fmt.Sprint(expectLength))
@@ -364,10 +346,16 @@ func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
                                err = ContentLengthMismatch
                        }
                }
-       case keepclient.BlockNotFound:
-               status = http.StatusNotFound
+       case keepclient.Error:
+               if respErr == keepclient.BlockNotFound {
+                       status = http.StatusNotFound
+               } else if respErr.Temporary() {
+                       status = http.StatusBadGateway
+               } else {
+                       status = 422
+               }
        default:
-               status = http.StatusBadGateway
+               status = http.StatusInternalServerError
        }
 }
 
@@ -421,7 +409,7 @@ func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
 
        var pass bool
        var tok string
-       if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
+       if pass, tok = CheckAuthorizationHeader(&kc, this.ApiTokenCache, req); !pass {
                err = BadAuthorizationHeader
                status = http.StatusForbidden
                return
@@ -481,3 +469,63 @@ func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
                status = http.StatusBadGateway
        }
 }
+
+// ServeHTTP implementation for IndexHandler
+// Supports only GET requests for /index/{prefix:[0-9a-f]{0,32}}
+// For each keep server found in LocalRoots:
+//   Invokes GetIndex using keepclient
+//   Expects "complete" response (terminating with blank new line)
+//   Aborts on any errors
+// Concatenates responses from all those keep servers and returns
+func (handler IndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       SetCorsHeaders(resp)
+
+       prefix := mux.Vars(req)["prefix"]
+       var err error
+       var status int
+
+       defer func() {
+               if status != http.StatusOK {
+                       http.Error(resp, err.Error(), status)
+               }
+       }()
+
+       kc := *handler.KeepClient
+
+       ok, token := CheckAuthorizationHeader(&kc, handler.ApiTokenCache, req)
+       if !ok {
+               status, err = http.StatusForbidden, BadAuthorizationHeader
+               return
+       }
+
+       // Copy ArvadosClient struct and use the client's API token
+       arvclient := *kc.Arvados
+       arvclient.ApiToken = token
+       kc.Arvados = &arvclient
+
+       // Only GET method is supported
+       if req.Method != "GET" {
+               status, err = http.StatusNotImplemented, MethodNotSupported
+               return
+       }
+
+       // Get index from all LocalRoots and write to resp
+       var reader io.Reader
+       for uuid := range kc.LocalRoots() {
+               reader, err = kc.GetIndex(uuid, prefix)
+               if err != nil {
+                       status = http.StatusBadGateway
+                       return
+               }
+
+               _, err = io.Copy(resp, reader)
+               if err != nil {
+                       status = http.StatusBadGateway
+                       return
+               }
+       }
+
+       // Got index from all the keep servers and wrote to resp
+       status = http.StatusOK
+       resp.Write([]byte("\n"))
+}
index 22cc72ea15ea4b23b308e355a0292cec808557d2..56c9bff91c63d30306ed7b08f3fa08b215e7479f 100644 (file)
@@ -2,21 +2,19 @@ package main
 
 import (
        "crypto/md5"
-       "crypto/tls"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       . "gopkg.in/check.v1"
-       "io"
        "io/ioutil"
        "log"
        "net/http"
-       "net/url"
        "os"
        "strings"
        "testing"
        "time"
+
+       . "gopkg.in/check.v1"
 )
 
 // Gocheck boilerplate
@@ -30,6 +28,14 @@ var _ = Suite(&ServerRequiredSuite{})
 // Tests that require the Keep server running
 type ServerRequiredSuite struct{}
 
+// Gocheck boilerplate
+var _ = Suite(&NoKeepServerSuite{})
+
+// Test with no keepserver to simulate errors
+type NoKeepServerSuite struct{}
+
+var TestProxyUUID = "zzzzz-bi6l4-lrixqc4fxofbmzz"
+
 // Wait (up to 1 second) for keepproxy to listen on a port. This
 // avoids a race condition where we hit a "connection refused" error
 // because we start testing the proxy too soon.
@@ -53,7 +59,7 @@ func closeListener() {
 
 func (s *ServerRequiredSuite) SetUpSuite(c *C) {
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 }
 
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
@@ -61,110 +67,50 @@ func (s *ServerRequiredSuite) SetUpTest(c *C) {
 }
 
 func (s *ServerRequiredSuite) TearDownSuite(c *C) {
-       arvadostest.StopKeep()
+       arvadostest.StopKeep(2)
        arvadostest.StopAPI()
 }
 
-func setupProxyService() {
-
-       client := &http.Client{Transport: &http.Transport{
-               TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}}
-
-       var req *http.Request
-       var err error
-       if req, err = http.NewRequest("POST", fmt.Sprintf("https://%s/arvados/v1/keep_services", os.Getenv("ARVADOS_API_HOST")), nil); err != nil {
-               panic(err.Error())
-       }
-       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", os.Getenv("ARVADOS_API_TOKEN")))
-
-       reader, writer := io.Pipe()
-
-       req.Body = reader
-
-       go func() {
-               data := url.Values{}
-               data.Set("keep_service", `{
-  "service_host": "localhost",
-  "service_port": 29950,
-  "service_ssl_flag": false,
-  "service_type": "proxy"
-}`)
+func (s *NoKeepServerSuite) SetUpSuite(c *C) {
+       arvadostest.StartAPI()
+       // We need API to have some keep services listed, but the
+       // services themselves should be unresponsive.
+       arvadostest.StartKeep(2, false)
+       arvadostest.StopKeep(2)
+}
 
-               writer.Write([]byte(data.Encode()))
-               writer.Close()
-       }()
+func (s *NoKeepServerSuite) SetUpTest(c *C) {
+       arvadostest.ResetEnv()
+}
 
-       var resp *http.Response
-       if resp, err = client.Do(req); err != nil {
-               panic(err.Error())
-       }
-       if resp.StatusCode != 200 {
-               panic(resp.Status)
-       }
+func (s *NoKeepServerSuite) TearDownSuite(c *C) {
+       arvadostest.StopAPI()
 }
 
-func runProxy(c *C, args []string, port int, bogusClientToken bool) keepclient.KeepClient {
-       if bogusClientToken {
-               os.Setenv("ARVADOS_API_TOKEN", "bogus-token")
-       }
+func runProxy(c *C, args []string, bogusClientToken bool) *keepclient.KeepClient {
+       args = append([]string{"keepproxy"}, args...)
+       os.Args = append(args, "-listen=:0")
+       listener = nil
+       go main()
+       waitForListener()
+
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, Equals, nil)
-       kc := keepclient.KeepClient{
-               Arvados:       &arv,
-               Want_replicas: 2,
-               Using_proxy:   true,
-               Client:        &http.Client{},
-       }
-       locals := map[string]string{
-               "proxy": fmt.Sprintf("http://localhost:%v", port),
-       }
-       writableLocals := map[string]string{
-               "proxy": fmt.Sprintf("http://localhost:%v", port),
-       }
-       kc.SetServiceRoots(locals, writableLocals, nil)
-       c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.LocalRoots()), Equals, 1)
-       for _, root := range kc.LocalRoots() {
-               c.Check(root, Equals, fmt.Sprintf("http://localhost:%v", port))
-       }
-       log.Print("keepclient created")
        if bogusClientToken {
-               arvadostest.ResetEnv()
+               arv.ApiToken = "bogus-token"
        }
-
-       {
-               os.Args = append(args, fmt.Sprintf("-listen=:%v", port))
-               listener = nil
-               go main()
+       kc := keepclient.New(&arv)
+       sr := map[string]string{
+               TestProxyUUID: "http://" + listener.Addr().String(),
        }
+       kc.SetServiceRoots(sr, sr, sr)
+       kc.Arvados.External = true
 
        return kc
 }
 
 func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
-       log.Print("TestPutAndGet start")
-
-       os.Args = []string{"keepproxy", "-listen=:29950"}
-       listener = nil
-       go main()
-       time.Sleep(100 * time.Millisecond)
-
-       setupProxyService()
-
-       os.Setenv("ARVADOS_EXTERNAL_CLIENT", "true")
-       arv, err := arvadosclient.MakeArvadosClient()
-       c.Assert(err, Equals, nil)
-       kc, err := keepclient.MakeKeepClient(&arv)
-       c.Assert(err, Equals, nil)
-       c.Check(kc.Arvados.External, Equals, true)
-       c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.LocalRoots()), Equals, 1)
-       for _, root := range kc.LocalRoots() {
-               c.Check(root, Equals, "http://localhost:29950")
-       }
-       os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
-
-       waitForListener()
+       kc := runProxy(c, nil, false)
        defer closeListener()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
@@ -236,22 +182,19 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
                c.Check(blocklen, Equals, int64(0))
                log.Print("Finished Get zero block")
        }
-
-       log.Print("TestPutAndGet done")
 }
 
 func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) {
-       log.Print("TestPutAskGetForbidden start")
-
-       kc := runProxy(c, []string{"keepproxy"}, 29951, true)
-       waitForListener()
+       kc := runProxy(c, nil, true)
        defer closeListener()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("bar")))
 
        {
                _, _, err := kc.Ask(hash)
-               c.Check(err, Equals, keepclient.BlockNotFound)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound, NotNil)
+               c.Assert(strings.Contains(err.Error(), "HTTP 403"), Equals, true)
                log.Print("Ask 1")
        }
 
@@ -265,33 +208,34 @@ func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) {
 
        {
                blocklen, _, err := kc.Ask(hash)
-               c.Assert(err, Equals, keepclient.BlockNotFound)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound, NotNil)
+               c.Assert(strings.Contains(err.Error(), "HTTP 403"), Equals, true)
                c.Check(blocklen, Equals, int64(0))
                log.Print("Ask 2")
        }
 
        {
                _, blocklen, _, err := kc.Get(hash)
-               c.Assert(err, Equals, keepclient.BlockNotFound)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound, NotNil)
+               c.Assert(strings.Contains(err.Error(), "HTTP 403"), Equals, true)
                c.Check(blocklen, Equals, int64(0))
                log.Print("Get")
        }
-
-       log.Print("TestPutAskGetForbidden done")
 }
 
 func (s *ServerRequiredSuite) TestGetDisabled(c *C) {
-       log.Print("TestGetDisabled start")
-
-       kc := runProxy(c, []string{"keepproxy", "-no-get"}, 29952, false)
-       waitForListener()
+       kc := runProxy(c, []string{"-no-get"}, false)
        defer closeListener()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("baz")))
 
        {
                _, _, err := kc.Ask(hash)
-               c.Check(err, Equals, keepclient.BlockNotFound)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound, NotNil)
+               c.Assert(strings.Contains(err.Error(), "HTTP 400"), Equals, true)
                log.Print("Ask 1")
        }
 
@@ -305,49 +249,41 @@ func (s *ServerRequiredSuite) TestGetDisabled(c *C) {
 
        {
                blocklen, _, err := kc.Ask(hash)
-               c.Assert(err, Equals, keepclient.BlockNotFound)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound, NotNil)
+               c.Assert(strings.Contains(err.Error(), "HTTP 400"), Equals, true)
                c.Check(blocklen, Equals, int64(0))
                log.Print("Ask 2")
        }
 
        {
                _, blocklen, _, err := kc.Get(hash)
-               c.Assert(err, Equals, keepclient.BlockNotFound)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound, NotNil)
+               c.Assert(strings.Contains(err.Error(), "HTTP 400"), Equals, true)
                c.Check(blocklen, Equals, int64(0))
                log.Print("Get")
        }
-
-       log.Print("TestGetDisabled done")
 }
 
 func (s *ServerRequiredSuite) TestPutDisabled(c *C) {
-       log.Print("TestPutDisabled start")
-
-       kc := runProxy(c, []string{"keepproxy", "-no-put"}, 29953, false)
-       waitForListener()
+       kc := runProxy(c, []string{"-no-put"}, false)
        defer closeListener()
 
-       {
-               hash2, rep, err := kc.PutB([]byte("quux"))
-               c.Check(hash2, Equals, "")
-               c.Check(rep, Equals, 0)
-               c.Check(err, Equals, keepclient.InsufficientReplicasError)
-               log.Print("PutB")
-       }
-
-       log.Print("TestPutDisabled done")
+       hash2, rep, err := kc.PutB([]byte("quux"))
+       c.Check(hash2, Equals, "")
+       c.Check(rep, Equals, 0)
+       c.Check(err, Equals, keepclient.InsufficientReplicasError)
 }
 
 func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
-       runProxy(c, []string{"keepproxy"}, 29954, false)
-       waitForListener()
+       runProxy(c, nil, false)
        defer closeListener()
 
        {
                client := http.Client{}
                req, err := http.NewRequest("OPTIONS",
-                       fmt.Sprintf("http://localhost:29954/%x+3",
-                               md5.Sum([]byte("foo"))),
+                       fmt.Sprintf("http://%s/%x+3", listener.Addr().String(), md5.Sum([]byte("foo"))),
                        nil)
                req.Header.Add("Access-Control-Request-Method", "PUT")
                req.Header.Add("Access-Control-Request-Headers", "Authorization, X-Keep-Desired-Replicas")
@@ -362,8 +298,7 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
 
        {
                resp, err := http.Get(
-                       fmt.Sprintf("http://localhost:29954/%x+3",
-                               md5.Sum([]byte("foo"))))
+                       fmt.Sprintf("http://%s/%x+3", listener.Addr().String(), md5.Sum([]byte("foo"))))
                c.Check(err, Equals, nil)
                c.Check(resp.Header.Get("Access-Control-Allow-Headers"), Equals, "Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas")
                c.Check(resp.Header.Get("Access-Control-Allow-Origin"), Equals, "*")
@@ -371,14 +306,13 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) {
-       runProxy(c, []string{"keepproxy"}, 29955, false)
-       waitForListener()
+       runProxy(c, nil, false)
        defer closeListener()
 
        {
                client := http.Client{}
                req, err := http.NewRequest("POST",
-                       "http://localhost:29955/",
+                       "http://"+listener.Addr().String()+"/",
                        strings.NewReader("qux"))
                req.Header.Add("Authorization", "OAuth2 4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h")
                req.Header.Add("Content-Type", "application/octet-stream")
@@ -406,3 +340,153 @@ func (s *ServerRequiredSuite) TestStripHint(c *C) {
                "http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73")
 
 }
+
+// Test GetIndex
+//   Put one block, with 2 replicas
+//   With no prefix (expect the block locator, twice)
+//   With an existing prefix (expect the block locator, twice)
+//   With a valid but non-existing prefix (expect "\n")
+//   With an invalid prefix (expect error)
+func (s *ServerRequiredSuite) TestGetIndex(c *C) {
+       kc := runProxy(c, nil, false)
+       defer closeListener()
+
+       // Put "index-data" blocks
+       data := []byte("index-data")
+       hash := fmt.Sprintf("%x", md5.Sum(data))
+
+       hash2, rep, err := kc.PutB(data)
+       c.Check(hash2, Matches, fmt.Sprintf(`^%s\+10(\+.+)?$`, hash))
+       c.Check(rep, Equals, 2)
+       c.Check(err, Equals, nil)
+
+       reader, blocklen, _, err := kc.Get(hash)
+       c.Assert(err, Equals, nil)
+       c.Check(blocklen, Equals, int64(10))
+       all, err := ioutil.ReadAll(reader)
+       c.Check(all, DeepEquals, data)
+
+       // Put some more blocks
+       _, rep, err = kc.PutB([]byte("some-more-index-data"))
+       c.Check(err, Equals, nil)
+
+       kc.Arvados.ApiToken = arvadostest.DataManagerToken
+
+       // Invoke GetIndex
+       for _, spec := range []struct {
+               prefix         string
+               expectTestHash bool
+               expectOther    bool
+       }{
+               {"", true, true},         // with no prefix
+               {hash[:3], true, false},  // with matching prefix
+               {"abcdef", false, false}, // with no such prefix
+       } {
+               indexReader, err := kc.GetIndex(TestProxyUUID, spec.prefix)
+               c.Assert(err, Equals, nil)
+               indexResp, err := ioutil.ReadAll(indexReader)
+               c.Assert(err, Equals, nil)
+               locators := strings.Split(string(indexResp), "\n")
+               gotTestHash := 0
+               gotOther := 0
+               for _, locator := range locators {
+                       if locator == "" {
+                               continue
+                       }
+                       c.Check(locator[:len(spec.prefix)], Equals, spec.prefix)
+                       if locator[:32] == hash {
+                               gotTestHash++
+                       } else {
+                               gotOther++
+                       }
+               }
+               c.Check(gotTestHash == 2, Equals, spec.expectTestHash)
+               c.Check(gotOther > 0, Equals, spec.expectOther)
+       }
+
+       // GetIndex with invalid prefix
+       _, err = kc.GetIndex(TestProxyUUID, "xyz")
+       c.Assert((err != nil), Equals, true)
+}
+
+func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) {
+       kc := runProxy(c, nil, false)
+       defer closeListener()
+
+       // Put a test block
+       hash, rep, err := kc.PutB([]byte("foo"))
+       c.Check(err, Equals, nil)
+       c.Check(rep, Equals, 2)
+
+       for _, token := range []string{
+               "nosuchtoken",
+               "2ym314ysp27sk7h943q6vtc378srb06se3pq6ghurylyf3pdmx", // expired
+       } {
+               // Change token to given bad token
+               kc.Arvados.ApiToken = token
+
+               // Ask should result in error
+               _, _, err = kc.Ask(hash)
+               c.Check(err, NotNil)
+               errNotFound, _ := err.(keepclient.ErrNotFound)
+               c.Check(errNotFound.Temporary(), Equals, false)
+               c.Assert(strings.Contains(err.Error(), "HTTP 403"), Equals, true)
+
+               // Get should result in error
+               _, _, _, err = kc.Get(hash)
+               c.Check(err, NotNil)
+               errNotFound, _ = err.(keepclient.ErrNotFound)
+               c.Check(errNotFound.Temporary(), Equals, false)
+               c.Assert(strings.Contains(err.Error(), "HTTP 403 \"Missing or invalid Authorization header\""), Equals, true)
+       }
+}
+
+func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, Equals, nil)
+
+       // keepclient with no such keep server
+       kc := keepclient.New(&arv)
+       locals := map[string]string{
+               TestProxyUUID: "http://localhost:12345",
+       }
+       kc.SetServiceRoots(locals, nil, nil)
+
+       // Ask should result in temporary connection refused error
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+       _, _, err = kc.Ask(hash)
+       c.Check(err, NotNil)
+       errNotFound, _ := err.(*keepclient.ErrNotFound)
+       c.Check(errNotFound.Temporary(), Equals, true)
+       c.Assert(strings.Contains(err.Error(), "connection refused"), Equals, true)
+
+       // Get should result in temporary connection refused error
+       _, _, _, err = kc.Get(hash)
+       c.Check(err, NotNil)
+       errNotFound, _ = err.(*keepclient.ErrNotFound)
+       c.Check(errNotFound.Temporary(), Equals, true)
+       c.Assert(strings.Contains(err.Error(), "connection refused"), Equals, true)
+}
+
+func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) {
+       kc := runProxy(c, nil, false)
+       defer closeListener()
+
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+       for _, f := range []func() error{
+               func() error {
+                       _, _, err := kc.Ask(hash)
+                       return err
+               },
+               func() error {
+                       _, _, _, err := kc.Get(hash)
+                       return err
+               },
+       } {
+               err := f()
+               c.Assert(err, NotNil)
+               errNotFound, _ := err.(*keepclient.ErrNotFound)
+               c.Check(errNotFound.Temporary(), Equals, true)
+               c.Check(err, ErrorMatches, `.*HTTP 502.*`)
+       }
+}
diff --git a/services/keepproxy/pkg-extras/etc/default/keepproxy b/services/keepproxy/pkg-extras/etc/default/keepproxy
new file mode 100644 (file)
index 0000000..ddcab10
--- /dev/null
@@ -0,0 +1,7 @@
+user="root"
+group="root"
+chroot="/"
+chdir="/"
+nice=""
+args="-listen=':9100'"
+
diff --git a/services/keepproxy/pkg-extras/etc/init.d/keepproxy b/services/keepproxy/pkg-extras/etc/init.d/keepproxy
new file mode 100755 (executable)
index 0000000..7bb35b9
--- /dev/null
@@ -0,0 +1,156 @@
+#!/bin/sh
+# Init script for keepproxy
+# Maintained by 
+# Generated by pleaserun.
+# Implemented based on LSB Core 3.1:
+#   * Sections: 20.2, 20.3
+#
+### BEGIN INIT INFO
+# Provides:          keepproxy
+# Required-Start:    $remote_fs $syslog
+# Required-Stop:     $remote_fs $syslog
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# Short-Description: 
+# Description:       no description given
+### END INIT INFO
+
+PATH=/sbin:/usr/sbin:/bin:/usr/bin
+export PATH
+
+name=keepproxy
+program=/usr/bin/keepproxy
+args=''
+pidfile="/var/run/$name.pid"
+
+[ -r /etc/default/$name ] && . /etc/default/$name
+[ -r /etc/sysconfig/$name ] && . /etc/sysconfig/$name
+
+trace() {
+  logger -t "/etc/init.d/keepproxy" "$@"
+}
+
+emit() {
+  trace "$@"
+  echo "$@"
+}
+
+start() {
+
+  # Ensure the log directory is setup correctly.
+  [ ! -d "/var/log/" ] && mkdir "/var/log/"
+  chown "$user":"$group" "/var/log/"
+  chmod 755 "/var/log/"
+
+
+  # Setup any environmental stuff beforehand
+  
+
+  # Run the program!
+  
+  chroot --userspec "$user":"$group" "$chroot" sh -c "
+    
+    cd \"$chdir\"
+    exec \"$program\" $args
+  " >> /var/log/keepproxy.stdout 2>> /var/log/keepproxy.stderr &
+
+  # Generate the pidfile from here. If we instead made the forked process
+  # generate it there will be a race condition between the pidfile writing
+  # and a process possibly asking for status.
+  echo $! > $pidfile
+
+  emit "$name started"
+  return 0
+}
+
+stop() {
+  # Try a few times to kill TERM the program
+  if status ; then
+    pid=$(cat "$pidfile")
+    trace "Killing $name (pid $pid) with SIGTERM"
+    kill -TERM $pid
+    # Wait for it to exit.
+    for i in 1 2 3 4 5 ; do
+      trace "Waiting $name (pid $pid) to die..."
+      status || break
+      sleep 1
+    done
+    if status ; then
+      emit "$name stop failed; still running."
+    else
+      emit "$name stopped."
+    fi
+  fi
+}
+
+status() {
+  if [ -f "$pidfile" ] ; then
+    pid=$(cat "$pidfile")
+    if ps -p $pid > /dev/null 2> /dev/null ; then
+      # process by this pid is running.
+      # It may not be our pid, but that's what you get with just pidfiles.
+      # TODO(sissel): Check if this process seems to be the same as the one we
+      # expect. It'd be nice to use flock here, but flock uses fork, not exec,
+      # so it makes it quite awkward to use in this case.
+      return 0
+    else
+      return 2 # program is dead but pid file exists
+    fi
+  else
+    return 3 # program is not running
+  fi
+}
+
+force_stop() {
+  if status ; then
+    stop
+    status && kill -KILL $(cat "$pidfile")
+  fi
+}
+
+
+case "$1" in
+  force-start|start|stop|force-stop|restart)
+    trace "Attempting '$1' on keepproxy"
+    ;;
+esac
+
+case "$1" in
+  force-start)
+    PRESTART=no
+    exec "$0" start
+    ;;
+  start)
+    status
+    code=$?
+    if [ $code -eq 0 ]; then
+      emit "$name is already running"
+      exit $code
+    else
+      start
+      exit $?
+    fi
+    ;;
+  stop) stop ;;
+  force-stop) force_stop ;;
+  status) 
+    status
+    code=$?
+    if [ $code -eq 0 ] ; then
+      emit "$name is running"
+    else
+      emit "$name is not running"
+    fi
+    exit $code
+    ;;
+  restart) 
+    
+    stop && start 
+    ;;
+  *)
+    echo "Usage: $SCRIPTNAME {start|force-start|stop|force-start|force-stop|status|restart}" >&2
+    exit 3
+  ;;
+esac
+
+exit $?
diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go
new file mode 100644 (file)
index 0000000..0f98e6e
--- /dev/null
@@ -0,0 +1,322 @@
+package main
+
+import (
+       "bytes"
+       "errors"
+       "flag"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "log"
+       "os"
+       "regexp"
+       "strings"
+       "time"
+
+       "github.com/curoverse/azure-sdk-for-go/storage"
+)
+
+var (
+       azureStorageAccountName    string
+       azureStorageAccountKeyFile string
+       azureStorageReplication    int
+       azureWriteRaceInterval     = 15 * time.Second
+       azureWriteRacePollTime     = time.Second
+)
+
+func readKeyFromFile(file string) (string, error) {
+       buf, err := ioutil.ReadFile(file)
+       if err != nil {
+               return "", errors.New("reading key from " + file + ": " + err.Error())
+       }
+       accountKey := strings.TrimSpace(string(buf))
+       if accountKey == "" {
+               return "", errors.New("empty account key in " + file)
+       }
+       return accountKey, nil
+}
+
+type azureVolumeAdder struct {
+       *volumeSet
+}
+
+func (s *azureVolumeAdder) Set(containerName string) error {
+       if containerName == "" {
+               return errors.New("no container name given")
+       }
+       if azureStorageAccountName == "" || azureStorageAccountKeyFile == "" {
+               return errors.New("-azure-storage-account-name and -azure-storage-account-key-file arguments must given before -azure-storage-container-volume")
+       }
+       accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
+       if err != nil {
+               return err
+       }
+       azClient, err := storage.NewBasicClient(azureStorageAccountName, accountKey)
+       if err != nil {
+               return errors.New("creating Azure storage client: " + err.Error())
+       }
+       if flagSerializeIO {
+               log.Print("Notice: -serialize is not supported by azure-blob-container volumes.")
+       }
+       v := NewAzureBlobVolume(azClient, containerName, flagReadonly, azureStorageReplication)
+       if err := v.Check(); err != nil {
+               return err
+       }
+       *s.volumeSet = append(*s.volumeSet, v)
+       return nil
+}
+
+func init() {
+       flag.Var(&azureVolumeAdder{&volumes},
+               "azure-storage-container-volume",
+               "Use the given container as a storage volume. Can be given multiple times.")
+       flag.StringVar(
+               &azureStorageAccountName,
+               "azure-storage-account-name",
+               "",
+               "Azure storage account name used for subsequent --azure-storage-container-volume arguments.")
+       flag.StringVar(
+               &azureStorageAccountKeyFile,
+               "azure-storage-account-key-file",
+               "",
+               "File containing the account key used for subsequent --azure-storage-container-volume arguments.")
+       flag.IntVar(
+               &azureStorageReplication,
+               "azure-storage-replication",
+               3,
+               "Replication level to report to clients when data is stored in an Azure container.")
+}
+
+// An AzureBlobVolume stores and retrieves blocks in an Azure Blob
+// container.
+type AzureBlobVolume struct {
+       azClient      storage.Client
+       bsClient      storage.BlobStorageClient
+       containerName string
+       readonly      bool
+       replication   int
+}
+
+// NewAzureBlobVolume returns a new AzureBlobVolume using the given
+// client and container name. The replication argument specifies the
+// replication level to report when writing data.
+func NewAzureBlobVolume(client storage.Client, containerName string, readonly bool, replication int) *AzureBlobVolume {
+       return &AzureBlobVolume{
+               azClient:      client,
+               bsClient:      client.GetBlobService(),
+               containerName: containerName,
+               readonly:      readonly,
+               replication:   replication,
+       }
+}
+
+// Check returns nil if the volume is usable.
+func (v *AzureBlobVolume) Check() error {
+       ok, err := v.bsClient.ContainerExists(v.containerName)
+       if err != nil {
+               return err
+       }
+       if !ok {
+               return errors.New("container does not exist")
+       }
+       return nil
+}
+
+// Get reads a Keep block that has been stored as a block blob in the
+// container.
+//
+// If the block is younger than azureWriteRaceInterval and is
+// unexpectedly empty, assume a PutBlob operation is in progress, and
+// wait for it to finish writing.
+func (v *AzureBlobVolume) Get(loc string) ([]byte, error) {
+       var deadline time.Time
+       haveDeadline := false
+       buf, err := v.get(loc)
+       for err == nil && len(buf) == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
+               // Seeing a brand new empty block probably means we're
+               // in a race with CreateBlob, which under the hood
+               // (apparently) does "CreateEmpty" and "CommitData"
+               // with no additional transaction locking.
+               if !haveDeadline {
+                       t, err := v.Mtime(loc)
+                       if err != nil {
+                               log.Print("Got empty block (possible race) but Mtime failed: ", err)
+                               break
+                       }
+                       deadline = t.Add(azureWriteRaceInterval)
+                       if time.Now().After(deadline) {
+                               break
+                       }
+                       log.Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
+                       haveDeadline = true
+               } else if time.Now().After(deadline) {
+                       break
+               }
+               bufs.Put(buf)
+               time.Sleep(azureWriteRacePollTime)
+               buf, err = v.get(loc)
+       }
+       if haveDeadline {
+               log.Printf("Race ended with len(buf)==%d", len(buf))
+       }
+       return buf, err
+}
+
+func (v *AzureBlobVolume) get(loc string) ([]byte, error) {
+       rdr, err := v.bsClient.GetBlob(v.containerName, loc)
+       if err != nil {
+               return nil, v.translateError(err)
+       }
+       defer rdr.Close()
+       buf := bufs.Get(BlockSize)
+       n, err := io.ReadFull(rdr, buf)
+       switch err {
+       case nil, io.EOF, io.ErrUnexpectedEOF:
+               return buf[:n], nil
+       default:
+               bufs.Put(buf)
+               return nil, err
+       }
+}
+
+// Compare the given data with existing stored data.
+func (v *AzureBlobVolume) Compare(loc string, expect []byte) error {
+       rdr, err := v.bsClient.GetBlob(v.containerName, loc)
+       if err != nil {
+               return v.translateError(err)
+       }
+       defer rdr.Close()
+       return compareReaderWithBuf(rdr, expect, loc[:32])
+}
+
+// Put stores a Keep block as a block blob in the container.
+func (v *AzureBlobVolume) Put(loc string, block []byte) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       return v.bsClient.CreateBlockBlobFromReader(v.containerName, loc, uint64(len(block)), bytes.NewReader(block))
+}
+
+// Touch updates the last-modified property of a block blob.
+func (v *AzureBlobVolume) Touch(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       return v.bsClient.SetBlobMetadata(v.containerName, loc, map[string]string{
+               "touch": fmt.Sprintf("%d", time.Now()),
+       })
+}
+
+// Mtime returns the last-modified property of a block blob.
+func (v *AzureBlobVolume) Mtime(loc string) (time.Time, error) {
+       props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
+       if err != nil {
+               return time.Time{}, err
+       }
+       return time.Parse(time.RFC1123, props.LastModified)
+}
+
+// IndexTo writes a list of Keep blocks that are stored in the
+// container.
+func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error {
+       params := storage.ListBlobsParameters{
+               Prefix: prefix,
+       }
+       for {
+               resp, err := v.bsClient.ListBlobs(v.containerName, params)
+               if err != nil {
+                       return err
+               }
+               for _, b := range resp.Blobs {
+                       t, err := time.Parse(time.RFC1123, b.Properties.LastModified)
+                       if err != nil {
+                               return err
+                       }
+                       if !v.isKeepBlock(b.Name) {
+                               continue
+                       }
+                       if b.Properties.ContentLength == 0 && t.Add(azureWriteRaceInterval).After(time.Now()) {
+                               // A new zero-length blob is probably
+                               // just a new non-empty blob that
+                               // hasn't committed its data yet (see
+                               // Get()), and in any case has no
+                               // value.
+                               continue
+                       }
+                       fmt.Fprintf(writer, "%s+%d %d\n", b.Name, b.Properties.ContentLength, t.Unix())
+               }
+               if resp.NextMarker == "" {
+                       return nil
+               }
+               params.Marker = resp.NextMarker
+       }
+}
+
+// Delete a Keep block.
+func (v *AzureBlobVolume) Delete(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       // Ideally we would use If-Unmodified-Since, but that
+       // particular condition seems to be ignored by Azure. Instead,
+       // we get the Etag before checking Mtime, and use If-Match to
+       // ensure we don't delete data if Put() or Touch() happens
+       // between our calls to Mtime() and DeleteBlob().
+       props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
+       if err != nil {
+               return err
+       }
+       if t, err := v.Mtime(loc); err != nil {
+               return err
+       } else if time.Since(t) < blobSignatureTTL {
+               return nil
+       }
+       return v.bsClient.DeleteBlob(v.containerName, loc, map[string]string{
+               "If-Match": props.Etag,
+       })
+}
+
+// Status returns a VolumeStatus struct with placeholder data.
+func (v *AzureBlobVolume) Status() *VolumeStatus {
+       return &VolumeStatus{
+               DeviceNum: 1,
+               BytesFree: BlockSize * 1000,
+               BytesUsed: 1,
+       }
+}
+
+// String returns a volume label, including the container name.
+func (v *AzureBlobVolume) String() string {
+       return fmt.Sprintf("azure-storage-container:%+q", v.containerName)
+}
+
+// Writable returns true, unless the -readonly flag was on when the
+// volume was added.
+func (v *AzureBlobVolume) Writable() bool {
+       return !v.readonly
+}
+
+// Replication returns the replication level of the container, as
+// specified by the -azure-storage-replication argument.
+func (v *AzureBlobVolume) Replication() int {
+       return v.replication
+}
+
+// If possible, translate an Azure SDK error to a recognizable error
+// like os.ErrNotExist.
+func (v *AzureBlobVolume) translateError(err error) error {
+       switch {
+       case err == nil:
+               return err
+       case strings.Contains(err.Error(), "404 Not Found"):
+               // "storage: service returned without a response body (404 Not Found)"
+               return os.ErrNotExist
+       default:
+               return err
+       }
+}
+
+var keepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
+func (v *AzureBlobVolume) isKeepBlock(s string) bool {
+       return keepBlockRegexp.MatchString(s)
+}
diff --git a/services/keepstore/azure_blob_volume_test.go b/services/keepstore/azure_blob_volume_test.go
new file mode 100644 (file)
index 0000000..b8bf5cb
--- /dev/null
@@ -0,0 +1,480 @@
+package main
+
+import (
+       "bytes"
+       "encoding/base64"
+       "encoding/xml"
+       "flag"
+       "fmt"
+       "io/ioutil"
+       "log"
+       "math/rand"
+       "net"
+       "net/http"
+       "net/http/httptest"
+       "regexp"
+       "sort"
+       "strconv"
+       "strings"
+       "sync"
+       "testing"
+       "time"
+
+       "github.com/curoverse/azure-sdk-for-go/storage"
+)
+
+const (
+       // The same fake credentials used by Microsoft's Azure emulator
+       emulatorAccountName = "devstoreaccount1"
+       emulatorAccountKey  = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+)
+
+var azureTestContainer string
+
+func init() {
+       flag.StringVar(
+               &azureTestContainer,
+               "test.azure-storage-container-volume",
+               "",
+               "Name of Azure container to use for testing. Do not use a container with real data! Use -azure-storage-account-name and -azure-storage-key-file arguments to supply credentials.")
+}
+
+type azBlob struct {
+       Data        []byte
+       Etag        string
+       Metadata    map[string]string
+       Mtime       time.Time
+       Uncommitted map[string][]byte
+}
+
+type azStubHandler struct {
+       sync.Mutex
+       blobs map[string]*azBlob
+       race  chan chan struct{}
+}
+
+func newAzStubHandler() *azStubHandler {
+       return &azStubHandler{
+               blobs: make(map[string]*azBlob),
+       }
+}
+
+func (h *azStubHandler) TouchWithDate(container, hash string, t time.Time) {
+       blob, ok := h.blobs[container+"|"+hash]
+       if !ok {
+               return
+       }
+       blob.Mtime = t
+}
+
+func (h *azStubHandler) PutRaw(container, hash string, data []byte) {
+       h.Lock()
+       defer h.Unlock()
+       h.blobs[container+"|"+hash] = &azBlob{
+               Data:        data,
+               Mtime:       time.Now(),
+               Uncommitted: make(map[string][]byte),
+       }
+}
+
+func (h *azStubHandler) unlockAndRace() {
+       if h.race == nil {
+               return
+       }
+       h.Unlock()
+       // Signal caller that race is starting by reading from
+       // h.race. If we get a channel, block until that channel is
+       // ready to receive. If we get nil (or h.race is closed) just
+       // proceed.
+       if c := <-h.race; c != nil {
+               c <- struct{}{}
+       }
+       h.Lock()
+}
+
+func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
+       h.Lock()
+       defer h.Unlock()
+       // defer log.Printf("azStubHandler: %+v", r)
+
+       path := strings.Split(r.URL.Path, "/")
+       container := path[1]
+       hash := ""
+       if len(path) > 2 {
+               hash = path[2]
+       }
+
+       if err := r.ParseForm(); err != nil {
+               log.Printf("azStubHandler(%+v): %s", r, err)
+               rw.WriteHeader(http.StatusBadRequest)
+               return
+       }
+
+       body, err := ioutil.ReadAll(r.Body)
+       if err != nil {
+               return
+       }
+
+       type blockListRequestBody struct {
+               XMLName     xml.Name `xml:"BlockList"`
+               Uncommitted []string
+       }
+
+       blob, blobExists := h.blobs[container+"|"+hash]
+
+       switch {
+       case r.Method == "PUT" && r.Form.Get("comp") == "":
+               // "Put Blob" API
+               if _, ok := h.blobs[container+"|"+hash]; !ok {
+                       // Like the real Azure service, we offer a
+                       // race window during which other clients can
+                       // list/get the new blob before any data is
+                       // committed.
+                       h.blobs[container+"|"+hash] = &azBlob{
+                               Mtime:       time.Now(),
+                               Uncommitted: make(map[string][]byte),
+                               Etag:        makeEtag(),
+                       }
+                       h.unlockAndRace()
+               }
+               h.blobs[container+"|"+hash] = &azBlob{
+                       Data:        body,
+                       Mtime:       time.Now(),
+                       Uncommitted: make(map[string][]byte),
+                       Etag:        makeEtag(),
+               }
+               rw.WriteHeader(http.StatusCreated)
+       case r.Method == "PUT" && r.Form.Get("comp") == "block":
+               // "Put Block" API
+               if !blobExists {
+                       log.Printf("Got block for nonexistent blob: %+v", r)
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               blockID, err := base64.StdEncoding.DecodeString(r.Form.Get("blockid"))
+               if err != nil || len(blockID) == 0 {
+                       log.Printf("Invalid blockid: %+q", r.Form.Get("blockid"))
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               blob.Uncommitted[string(blockID)] = body
+               rw.WriteHeader(http.StatusCreated)
+       case r.Method == "PUT" && r.Form.Get("comp") == "blocklist":
+               // "Put Block List" API
+               bl := &blockListRequestBody{}
+               if err := xml.Unmarshal(body, bl); err != nil {
+                       log.Printf("xml Unmarshal: %s", err)
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               for _, encBlockID := range bl.Uncommitted {
+                       blockID, err := base64.StdEncoding.DecodeString(encBlockID)
+                       if err != nil || len(blockID) == 0 || blob.Uncommitted[string(blockID)] == nil {
+                               log.Printf("Invalid blockid: %+q", encBlockID)
+                               rw.WriteHeader(http.StatusBadRequest)
+                               return
+                       }
+                       blob.Data = blob.Uncommitted[string(blockID)]
+                       blob.Etag = makeEtag()
+                       blob.Mtime = time.Now()
+                       delete(blob.Uncommitted, string(blockID))
+               }
+               rw.WriteHeader(http.StatusCreated)
+       case r.Method == "PUT" && r.Form.Get("comp") == "metadata":
+               // "Set Metadata Headers" API. We don't bother
+               // stubbing "Get Metadata Headers": AzureBlobVolume
+               // sets metadata headers only as a way to bump Etag
+               // and Last-Modified.
+               if !blobExists {
+                       log.Printf("Got metadata for nonexistent blob: %+v", r)
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               blob.Metadata = make(map[string]string)
+               for k, v := range r.Header {
+                       if strings.HasPrefix(strings.ToLower(k), "x-ms-meta-") {
+                               blob.Metadata[k] = v[0]
+                       }
+               }
+               blob.Mtime = time.Now()
+               blob.Etag = makeEtag()
+       case (r.Method == "GET" || r.Method == "HEAD") && hash != "":
+               // "Get Blob" API
+               if !blobExists {
+                       rw.WriteHeader(http.StatusNotFound)
+                       return
+               }
+               rw.Header().Set("Last-Modified", blob.Mtime.Format(time.RFC1123))
+               rw.Header().Set("Content-Length", strconv.Itoa(len(blob.Data)))
+               if r.Method == "GET" {
+                       if _, err := rw.Write(blob.Data); err != nil {
+                               log.Printf("write %+q: %s", blob.Data, err)
+                       }
+               }
+               h.unlockAndRace()
+       case r.Method == "DELETE" && hash != "":
+               // "Delete Blob" API
+               if !blobExists {
+                       rw.WriteHeader(http.StatusNotFound)
+                       return
+               }
+               delete(h.blobs, container+"|"+hash)
+               rw.WriteHeader(http.StatusAccepted)
+       case r.Method == "GET" && r.Form.Get("comp") == "list" && r.Form.Get("restype") == "container":
+               // "List Blobs" API
+               prefix := container + "|" + r.Form.Get("prefix")
+               marker := r.Form.Get("marker")
+
+               maxResults := 2
+               if n, err := strconv.Atoi(r.Form.Get("maxresults")); err == nil && n >= 1 && n <= 5000 {
+                       maxResults = n
+               }
+
+               resp := storage.BlobListResponse{
+                       Marker:     marker,
+                       NextMarker: "",
+                       MaxResults: int64(maxResults),
+               }
+               var hashes sort.StringSlice
+               for k := range h.blobs {
+                       if strings.HasPrefix(k, prefix) {
+                               hashes = append(hashes, k[len(container)+1:])
+                       }
+               }
+               hashes.Sort()
+               for _, hash := range hashes {
+                       if len(resp.Blobs) == maxResults {
+                               resp.NextMarker = hash
+                               break
+                       }
+                       if len(resp.Blobs) > 0 || marker == "" || marker == hash {
+                               blob := h.blobs[container+"|"+hash]
+                               resp.Blobs = append(resp.Blobs, storage.Blob{
+                                       Name: hash,
+                                       Properties: storage.BlobProperties{
+                                               LastModified:  blob.Mtime.Format(time.RFC1123),
+                                               ContentLength: int64(len(blob.Data)),
+                                               Etag:          blob.Etag,
+                                       },
+                               })
+                       }
+               }
+               buf, err := xml.Marshal(resp)
+               if err != nil {
+                       log.Print(err)
+                       rw.WriteHeader(http.StatusInternalServerError)
+               }
+               rw.Write(buf)
+       default:
+               log.Printf("azStubHandler: not implemented: %+v Body:%+q", r, body)
+               rw.WriteHeader(http.StatusNotImplemented)
+       }
+}
+
+// azStubDialer is a net.Dialer that notices when the Azure driver
+// tries to connect to "devstoreaccount1.blob.127.0.0.1:46067", and
+// in such cases transparently dials "127.0.0.1:46067" instead.
+type azStubDialer struct {
+       net.Dialer
+}
+
+var localHostPortRe = regexp.MustCompile(`(127\.0\.0\.1|localhost|\[::1\]):\d+`)
+
+func (d *azStubDialer) Dial(network, address string) (net.Conn, error) {
+       if hp := localHostPortRe.FindString(address); hp != "" {
+               log.Println("azStubDialer: dial", hp, "instead of", address)
+               address = hp
+       }
+       return d.Dialer.Dial(network, address)
+}
+
+type TestableAzureBlobVolume struct {
+       *AzureBlobVolume
+       azHandler *azStubHandler
+       azStub    *httptest.Server
+       t         TB
+}
+
+func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableAzureBlobVolume {
+       azHandler := newAzStubHandler()
+       azStub := httptest.NewServer(azHandler)
+
+       var azClient storage.Client
+
+       container := azureTestContainer
+       if container == "" {
+               // Connect to stub instead of real Azure storage service
+               stubURLBase := strings.Split(azStub.URL, "://")[1]
+               var err error
+               if azClient, err = storage.NewClient(emulatorAccountName, emulatorAccountKey, stubURLBase, storage.DefaultAPIVersion, false); err != nil {
+                       t.Fatal(err)
+               }
+               container = "fakecontainername"
+       } else {
+               // Connect to real Azure storage service
+               accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               azClient, err = storage.NewBasicClient(azureStorageAccountName, accountKey)
+               if err != nil {
+                       t.Fatal(err)
+               }
+       }
+
+       v := NewAzureBlobVolume(azClient, container, readonly, replication)
+
+       return &TestableAzureBlobVolume{
+               AzureBlobVolume: v,
+               azHandler:       azHandler,
+               azStub:          azStub,
+               t:               t,
+       }
+}
+
+func TestAzureBlobVolumeWithGeneric(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+       azureWriteRaceInterval = time.Millisecond
+       azureWriteRacePollTime = time.Nanosecond
+       DoGenericVolumeTests(t, func(t TB) TestableVolume {
+               return NewTestableAzureBlobVolume(t, false, azureStorageReplication)
+       })
+}
+
+func TestReadonlyAzureBlobVolumeWithGeneric(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+       azureWriteRaceInterval = time.Millisecond
+       azureWriteRacePollTime = time.Nanosecond
+       DoGenericVolumeTests(t, func(t TB) TestableVolume {
+               return NewTestableAzureBlobVolume(t, true, azureStorageReplication)
+       })
+}
+
+func TestAzureBlobVolumeReplication(t *testing.T) {
+       for r := 1; r <= 4; r++ {
+               v := NewTestableAzureBlobVolume(t, false, r)
+               defer v.Teardown()
+               if n := v.Replication(); n != r {
+                       t.Errorf("Got replication %d, expected %d", n, r)
+               }
+       }
+}
+
+func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+
+       v := NewTestableAzureBlobVolume(t, false, 3)
+       defer v.Teardown()
+
+       azureWriteRaceInterval = time.Second
+       azureWriteRacePollTime = time.Millisecond
+
+       allDone := make(chan struct{})
+       v.azHandler.race = make(chan chan struct{})
+       go func() {
+               err := v.Put(TestHash, TestBlock)
+               if err != nil {
+                       t.Error(err)
+               }
+       }()
+       continuePut := make(chan struct{})
+       // Wait for the stub's Put to create the empty blob
+       v.azHandler.race <- continuePut
+       go func() {
+               buf, err := v.Get(TestHash)
+               if err != nil {
+                       t.Error(err)
+               } else {
+                       bufs.Put(buf)
+               }
+               close(allDone)
+       }()
+       // Wait for the stub's Get to get the empty blob
+       close(v.azHandler.race)
+       // Allow stub's Put to continue, so the real data is ready
+       // when the volume's Get retries
+       <-continuePut
+       // Wait for volume's Get to return the real data
+       <-allDone
+}
+
+func TestAzureBlobVolumeCreateBlobRaceDeadline(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+
+       v := NewTestableAzureBlobVolume(t, false, 3)
+       defer v.Teardown()
+
+       azureWriteRaceInterval = 2 * time.Second
+       azureWriteRacePollTime = 5 * time.Millisecond
+
+       v.PutRaw(TestHash, nil)
+
+       buf := new(bytes.Buffer)
+       v.IndexTo("", buf)
+       if buf.Len() != 0 {
+               t.Errorf("Index %+q should be empty", buf.Bytes())
+       }
+
+       v.TouchWithDate(TestHash, time.Now().Add(-1982 * time.Millisecond))
+
+       allDone := make(chan struct{})
+       go func() {
+               defer close(allDone)
+               buf, err := v.Get(TestHash)
+               if err != nil {
+                       t.Error(err)
+                       return
+               }
+               if len(buf) != 0 {
+                       t.Errorf("Got %+q, expected empty buf", buf)
+               }
+               bufs.Put(buf)
+       }()
+       select {
+       case <-allDone:
+       case <-time.After(time.Second):
+               t.Error("Get should have stopped waiting for race when block was 2s old")
+       }
+
+       buf.Reset()
+       v.IndexTo("", buf)
+       if !bytes.HasPrefix(buf.Bytes(), []byte(TestHash+"+0")) {
+               t.Errorf("Index %+q should have %+q", buf.Bytes(), TestHash+"+0")
+       }
+}
+
+func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
+       v.azHandler.PutRaw(v.containerName, locator, data)
+}
+
+func (v *TestableAzureBlobVolume) TouchWithDate(locator string, lastPut time.Time) {
+       v.azHandler.TouchWithDate(v.containerName, locator, lastPut)
+}
+
+func (v *TestableAzureBlobVolume) Teardown() {
+       v.azStub.Close()
+}
+
+func makeEtag() string {
+       return fmt.Sprintf("0x%x", rand.Int63())
+}
index 8726a19150c7faf2f309cc6b3ba647d9aa40dd54..7b51b643cea54ba0b5643a2bf84c4ccfed9bcd5f 100644 (file)
@@ -2,15 +2,9 @@ package main
 
 import (
        . "gopkg.in/check.v1"
-       "testing"
        "time"
 )
 
-// Gocheck boilerplate
-func TestBufferPool(t *testing.T) {
-       TestingT(t)
-}
-
 var _ = Suite(&BufferPoolSuite{})
 
 type BufferPoolSuite struct{}
index be26514a00ce6ae9092bf12981f5f55818a083f1..a4af563729b3cf0e72686a2913fd3664e497e24d 100644 (file)
@@ -1,6 +1,7 @@
 package main
 
 import (
+       "bytes"
        "crypto/md5"
        "fmt"
        "io"
@@ -47,3 +48,37 @@ func collisionOrCorrupt(expectMD5 string, buf1, buf2 []byte, rdr io.Reader) erro
        }
        return <-outcome
 }
+
+func compareReaderWithBuf(rdr io.Reader, expect []byte, hash string) error {
+       bufLen := 1 << 20
+       if bufLen > len(expect) && len(expect) > 0 {
+               // No need for bufLen to be longer than
+               // expect, except that len(buf)==0 would
+               // prevent us from handling empty readers the
+               // same way as non-empty readers: reading 0
+               // bytes at a time never reaches EOF.
+               bufLen = len(expect)
+       }
+       buf := make([]byte, bufLen)
+       cmp := expect
+
+       // Loop invariants: all data read so far matched what
+       // we expected, and the first N bytes of cmp are
+       // expected to equal the next N bytes read from
+       // rdr.
+       for {
+               n, err := rdr.Read(buf)
+               if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
+                       return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], buf[:n], rdr)
+               }
+               cmp = cmp[n:]
+               if err == io.EOF {
+                       if len(cmp) != 0 {
+                               return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], nil, nil)
+                       }
+                       return nil
+               } else if err != nil {
+                       return err
+               }
+       }
+}
index 379daddd9803b945b68b6cbf290660c282a2bb2c..d9b7e614fc8aded50990ce41591dcb3a7685be09 100644 (file)
@@ -2,17 +2,11 @@ package main
 
 import (
        "bytes"
-       "testing"
        "testing/iotest"
 
        check "gopkg.in/check.v1"
 )
 
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       check.TestingT(t)
-}
-
 var _ = check.Suite(&CollisionSuite{})
 
 type CollisionSuite struct{}
diff --git a/services/keepstore/gocheck_test.go b/services/keepstore/gocheck_test.go
new file mode 100644 (file)
index 0000000..133ed6e
--- /dev/null
@@ -0,0 +1,10 @@
+package main
+
+import (
+       "gopkg.in/check.v1"
+       "testing"
+)
+
+func TestGocheck(t *testing.T) {
+       check.TestingT(t)
+}
index dd8366671b6b056f8e4161cfae8169cd6a086cc6..3817ea19002d1c18f14c2479a383fb2d1601d763 100644 (file)
@@ -321,6 +321,16 @@ func TestIndexHandler(t *testing.T) {
                uri:      "/index/" + TestHash[0:3],
                apiToken: dataManagerToken,
        }
+       superuserNoSuchPrefixReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index/abcd",
+               apiToken: dataManagerToken,
+       }
+       superuserInvalidPrefixReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index/xyz",
+               apiToken: dataManagerToken,
+       }
 
        // -------------------------------------------------------------
        // Only the superuser should be allowed to issue /index requests.
@@ -407,6 +417,26 @@ func TestIndexHandler(t *testing.T) {
                        "permissions on, superuser /index/prefix request: expected %s, got:\n%s",
                        expected, response.Body.String())
        }
+
+       // superuser /index/{no-such-prefix} request
+       // => OK
+       response = IssueRequest(superuserNoSuchPrefixReq)
+       ExpectStatusCode(t,
+               "permissions on, superuser request",
+               http.StatusOK,
+               response)
+
+       if "\n" != response.Body.String() {
+               t.Errorf("Expected empty response for %s. Found %s", superuserNoSuchPrefixReq.uri, response.Body.String())
+       }
+
+       // superuser /index/{invalid-prefix} request
+       // => StatusBadRequest
+       response = IssueRequest(superuserInvalidPrefixReq)
+       ExpectStatusCode(t,
+               "permissions on, superuser request",
+               http.StatusBadRequest,
+               response)
 }
 
 // TestDeleteHandler
@@ -924,3 +954,19 @@ func TestGetHandlerNoBufferleak(t *testing.T) {
        case <-ok:
        }
 }
+
+func TestPutReplicationHeader(t *testing.T) {
+       defer teardown()
+
+       KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
+
+       resp := IssueRequest(&RequestTester{
+               method:      "PUT",
+               uri:         "/" + TestHash,
+               requestBody: TestBlock,
+       })
+       if r := resp.Header().Get("X-Keep-Replicas-Stored"); r != "1" {
+               t.Errorf("Got X-Keep-Replicas-Stored: %q, expected %q", r, "1")
+       }
+}
index 2b96dbc582f8b584e731401f3e467dae635ea837..95af1b48707c6b189982dc18762cb517769bd117 100644 (file)
@@ -120,7 +120,7 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       err = PutBlock(buf, hash)
+       replication, err := PutBlock(buf, hash)
        bufs.Put(buf)
 
        if err != nil {
@@ -137,6 +137,7 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
                expiry := time.Now().Add(blobSignatureTTL)
                returnHash = SignLocator(returnHash, apiToken, expiry)
        }
+       resp.Header().Set("X-Keep-Replicas-Stored", strconv.Itoa(replication))
        resp.Write([]byte(returnHash + "\n"))
 }
 
@@ -517,40 +518,40 @@ func GetBlock(hash string) ([]byte, error) {
 //          all writes failed). The text of the error message should
 //          provide as much detail as possible.
 //
-func PutBlock(block []byte, hash string) error {
+func PutBlock(block []byte, hash string) (int, error) {
        // Check that BLOCK's checksum matches HASH.
        blockhash := fmt.Sprintf("%x", md5.Sum(block))
        if blockhash != hash {
                log.Printf("%s: MD5 checksum %s did not match request", hash, blockhash)
-               return RequestHashError
+               return 0, RequestHashError
        }
 
        // If we already have this data, it's intact on disk, and we
        // can update its timestamp, return success. If we have
        // different data with the same hash, return failure.
-       if err := CompareAndTouch(hash, block); err == nil || err == CollisionError {
-               return err
+       if n, err := CompareAndTouch(hash, block); err == nil || err == CollisionError {
+               return n, err
        }
 
        // Choose a Keep volume to write to.
        // If this volume fails, try all of the volumes in order.
        if vol := KeepVM.NextWritable(); vol != nil {
                if err := vol.Put(hash, block); err == nil {
-                       return nil // success!
+                       return vol.Replication(), nil // success!
                }
        }
 
        writables := KeepVM.AllWritable()
        if len(writables) == 0 {
                log.Print("No writable volumes.")
-               return FullError
+               return 0, FullError
        }
 
        allFull := true
        for _, vol := range writables {
                err := vol.Put(hash, block)
                if err == nil {
-                       return nil // success!
+                       return vol.Replication(), nil // success!
                }
                if err != FullError {
                        // The volume is not full but the
@@ -563,17 +564,18 @@ func PutBlock(block []byte, hash string) error {
 
        if allFull {
                log.Print("All volumes are full.")
-               return FullError
+               return 0, FullError
        }
        // Already logged the non-full errors.
-       return GenericError
+       return 0, GenericError
 }
 
-// CompareAndTouch returns nil if one of the volumes already has the
-// given content and it successfully updates the relevant block's
-// modification time in order to protect it from premature garbage
-// collection.
-func CompareAndTouch(hash string, buf []byte) error {
+// CompareAndTouch returns the current replication level if one of the
+// volumes already has the given content and it successfully updates
+// the relevant block's modification time in order to protect it from
+// premature garbage collection. Otherwise, it returns a non-nil
+// error.
+func CompareAndTouch(hash string, buf []byte) (int, error) {
        var bestErr error = NotFoundError
        for _, vol := range KeepVM.AllWritable() {
                if err := vol.Compare(hash, buf); err == CollisionError {
@@ -583,7 +585,7 @@ func CompareAndTouch(hash string, buf []byte) error {
                        // both, so there's no point writing it even
                        // on a different volume.)
                        log.Printf("%s: Compare(%s): %s", vol, hash, err)
-                       return err
+                       return 0, err
                } else if os.IsNotExist(err) {
                        // Block does not exist. This is the only
                        // "normal" error: we don't log anything.
@@ -601,9 +603,9 @@ func CompareAndTouch(hash string, buf []byte) error {
                        continue
                }
                // Compare and Touch both worked --> done.
-               return nil
+               return vol.Replication(), nil
        }
-       return bestErr
+       return 0, bestErr
 }
 
 var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
index 90094f311722c1cdd71ba94f51d87a9f867a8b57..c5349d399c32ebc5692d0a39d4cc8c9c3ad83e1a 100644 (file)
@@ -2,19 +2,18 @@ package main
 
 import (
        "bytes"
-       "testing"
 )
 
 // A TestableVolumeManagerFactory creates a volume manager with at least two TestableVolume instances.
 // The factory function, and the TestableVolume instances it returns, can use "t" to write
 // logs, fail the current test, etc.
-type TestableVolumeManagerFactory func(t *testing.T) (*RRVolumeManager, []TestableVolume)
+type TestableVolumeManagerFactory func(t TB) (*RRVolumeManager, []TestableVolume)
 
 // DoHandlersWithGenericVolumeTests runs a set of handler tests with a
 // Volume Manager comprised of TestableVolume instances.
 // It calls factory to create a volume manager with TestableVolume
 // instances for each test case, to avoid leaking state between tests.
-func DoHandlersWithGenericVolumeTests(t *testing.T, factory TestableVolumeManagerFactory) {
+func DoHandlersWithGenericVolumeTests(t TB, factory TestableVolumeManagerFactory) {
        testGetBlock(t, factory, TestHash, TestBlock)
        testGetBlock(t, factory, EmptyHash, EmptyBlock)
        testPutRawBadDataGetBlock(t, factory, TestHash, TestBlock, []byte("baddata"))
@@ -26,7 +25,7 @@ func DoHandlersWithGenericVolumeTests(t *testing.T, factory TestableVolumeManage
 }
 
 // Setup RRVolumeManager with TestableVolumes
-func setupHandlersWithGenericVolumeTest(t *testing.T, factory TestableVolumeManagerFactory) []TestableVolume {
+func setupHandlersWithGenericVolumeTest(t TB, factory TestableVolumeManagerFactory) []TestableVolume {
        vm, testableVolumes := factory(t)
        KeepVM = vm
 
@@ -39,7 +38,7 @@ func setupHandlersWithGenericVolumeTest(t *testing.T, factory TestableVolumeMana
 }
 
 // Put a block using PutRaw in just one volume and Get it using GetBlock
-func testGetBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
+func testGetBlock(t TB, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
        testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
 
        // Put testBlock in one volume
@@ -56,7 +55,7 @@ func testGetBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash s
 }
 
 // Put a bad block using PutRaw and get it.
-func testPutRawBadDataGetBlock(t *testing.T, factory TestableVolumeManagerFactory,
+func testPutRawBadDataGetBlock(t TB, factory TestableVolumeManagerFactory,
        testHash string, testBlock []byte, badData []byte) {
        testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
 
@@ -72,16 +71,16 @@ func testPutRawBadDataGetBlock(t *testing.T, factory TestableVolumeManagerFactor
 }
 
 // Invoke PutBlock twice to ensure CompareAndTouch path is tested.
-func testPutBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
+func testPutBlock(t TB, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
        setupHandlersWithGenericVolumeTest(t, factory)
 
        // PutBlock
-       if err := PutBlock(testBlock, testHash); err != nil {
+       if _, err := PutBlock(testBlock, testHash); err != nil {
                t.Fatalf("Error during PutBlock: %s", err)
        }
 
        // Check that PutBlock succeeds again even after CompareAndTouch
-       if err := PutBlock(testBlock, testHash); err != nil {
+       if _, err := PutBlock(testBlock, testHash); err != nil {
                t.Fatalf("Error during PutBlock: %s", err)
        }
 
@@ -95,7 +94,7 @@ func testPutBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash s
 }
 
 // Put a bad block using PutRaw, overwrite it using PutBlock and get it.
-func testPutBlockCorrupt(t *testing.T, factory TestableVolumeManagerFactory,
+func testPutBlockCorrupt(t TB, factory TestableVolumeManagerFactory,
        testHash string, testBlock []byte, badData []byte) {
        testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
 
@@ -104,7 +103,7 @@ func testPutBlockCorrupt(t *testing.T, factory TestableVolumeManagerFactory,
        testableVolumes[1].PutRaw(testHash, badData)
 
        // Check that PutBlock with good data succeeds
-       if err := PutBlock(testBlock, testHash); err != nil {
+       if _, err := PutBlock(testBlock, testHash); err != nil {
                t.Fatalf("Error during PutBlock for %q: %s", testHash, err)
        }
 
index 3e360e1799117e80e773e1e5c58fa3b5560b07ef..96a887fecb20b278a2c9a763ebfc094b71bf31ac 100644 (file)
@@ -1,9 +1,7 @@
 package main
 
 import (
-       "bufio"
        "bytes"
-       "errors"
        "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
@@ -14,7 +12,6 @@ import (
        "os"
        "os/signal"
        "strings"
-       "sync"
        "syscall"
        "time"
 )
@@ -36,10 +33,6 @@ const BlockSize = 64 * 1024 * 1024
 // in order to permit writes.
 const MinFreeKilobytes = BlockSize / 1024
 
-// Until #6221 is resolved, never_delete must be true.
-// However, allow it to be false in testing with TestDataManagerToken
-const TestDataManagerToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-
 // ProcMounts /proc/mounts
 var ProcMounts = "/proc/mounts"
 
@@ -114,95 +107,16 @@ var KeepVM VolumeManager
 var pullq *WorkQueue
 var trashq *WorkQueue
 
+type volumeSet []Volume
+
 var (
        flagSerializeIO bool
        flagReadonly    bool
+       volumes         volumeSet
 )
 
-type volumeSet []Volume
-
-func (vs *volumeSet) Set(value string) error {
-       if dirs := strings.Split(value, ","); len(dirs) > 1 {
-               log.Print("DEPRECATED: using comma-separated volume list.")
-               for _, dir := range dirs {
-                       if err := vs.Set(dir); err != nil {
-                               return err
-                       }
-               }
-               return nil
-       }
-       if len(value) == 0 || value[0] != '/' {
-               return errors.New("Invalid volume: must begin with '/'.")
-       }
-       if _, err := os.Stat(value); err != nil {
-               return err
-       }
-       var locker sync.Locker
-       if flagSerializeIO {
-               locker = &sync.Mutex{}
-       }
-       *vs = append(*vs, &UnixVolume{
-               root:     value,
-               locker:   locker,
-               readonly: flagReadonly,
-       })
-       return nil
-}
-
 func (vs *volumeSet) String() string {
-       s := "["
-       for i, v := range *vs {
-               if i > 0 {
-                       s = s + " "
-               }
-               s = s + v.String()
-       }
-       return s + "]"
-}
-
-// Discover adds a volume for every directory named "keep" that is
-// located at the top level of a device- or tmpfs-backed mount point
-// other than "/". It returns the number of volumes added.
-func (vs *volumeSet) Discover() int {
-       added := 0
-       f, err := os.Open(ProcMounts)
-       if err != nil {
-               log.Fatalf("opening %s: %s", ProcMounts, err)
-       }
-       scanner := bufio.NewScanner(f)
-       for scanner.Scan() {
-               args := strings.Fields(scanner.Text())
-               if err := scanner.Err(); err != nil {
-                       log.Fatalf("reading %s: %s", ProcMounts, err)
-               }
-               dev, mount := args[0], args[1]
-               if mount == "/" {
-                       continue
-               }
-               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
-                       continue
-               }
-               keepdir := mount + "/keep"
-               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
-                       continue
-               }
-               // Set the -readonly flag (but only for this volume)
-               // if the filesystem is mounted readonly.
-               flagReadonlyWas := flagReadonly
-               for _, fsopt := range strings.Split(args[3], ",") {
-                       if fsopt == "ro" {
-                               flagReadonly = true
-                               break
-                       }
-                       if fsopt == "rw" {
-                               break
-                       }
-               }
-               vs.Set(keepdir)
-               flagReadonly = flagReadonlyWas
-               added++
-       }
-       return added
+       return fmt.Sprintf("%+v", (*vs)[:])
 }
 
 // TODO(twp): continue moving as much code as possible out of main
@@ -219,7 +133,6 @@ func main() {
                listen               string
                blobSigningKeyFile   string
                permissionTTLSec     int
-               volumes              volumeSet
                pidfile              string
        )
        flag.StringVar(
@@ -242,8 +155,9 @@ func main() {
                &neverDelete,
                "never-delete",
                true,
-               "If set, nothing will be deleted. HTTP 405 will be returned "+
-                       "for valid DELETE requests.")
+               "If true, nothing will be deleted. "+
+                       "Warning: the relevant features in keepstore and data manager have not been extensively tested. "+
+                       "You should leave this option alone unless you can afford to lose data.")
        flag.StringVar(
                &blobSigningKeyFile,
                "permission-key-file",
@@ -276,14 +190,6 @@ func main() {
                "readonly",
                false,
                "Do not write, delete, or touch anything on the following volumes.")
-       flag.Var(
-               &volumes,
-               "volumes",
-               "Deprecated synonym for -volume.")
-       flag.Var(
-               &volumes,
-               "volume",
-               "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
        flag.StringVar(
                &pidfile,
                "pid",
@@ -328,7 +234,7 @@ func main() {
        }
 
        if len(volumes) == 0 {
-               if volumes.Discover() == 0 {
+               if (&unixVolumeAdder{&volumes}).Discover() == 0 {
                        log.Fatal("No volumes found.")
                }
        }
@@ -348,8 +254,9 @@ func main() {
                }
        }
 
-       if neverDelete != true && dataManagerToken != TestDataManagerToken {
-               log.Fatal("never_delete must be true, see #6221")
+       if neverDelete != true {
+               log.Print("never-delete is not set. Warning: the relevant features in keepstore and data manager have not " +
+                       "been extensively tested. You should leave this option alone unless you can afford to lose data.")
        }
 
        if blobSigningKeyFile != "" {
@@ -393,7 +300,6 @@ func main() {
        keepClient := &keepclient.KeepClient{
                Arvados:       nil,
                Want_replicas: 1,
-               Using_proxy:   true,
                Client:        &http.Client{},
        }
 
index 0e2129cc848d4f4bc7b2b91c17d2fa3115ae7732..2a1c3d243ab922855b2bf6344f69631a78272662 100644 (file)
@@ -10,6 +10,8 @@ import (
        "sort"
        "strings"
        "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
 )
 
 var TestBlock = []byte("The quick brown fox jumps over the lazy dog.")
@@ -126,8 +128,8 @@ func TestPutBlockOK(t *testing.T) {
        defer KeepVM.Close()
 
        // Check that PutBlock stores the data as expected.
-       if err := PutBlock(TestBlock, TestHash); err != nil {
-               t.Fatalf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Fatalf("PutBlock: n %d err %v", n, err)
        }
 
        vols := KeepVM.AllReadable()
@@ -156,8 +158,8 @@ func TestPutBlockOneVol(t *testing.T) {
        vols[0].(*MockVolume).Bad = true
 
        // Check that PutBlock stores the data as expected.
-       if err := PutBlock(TestBlock, TestHash); err != nil {
-               t.Fatalf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Fatalf("PutBlock: n %d err %v", n, err)
        }
 
        result, err := GetBlock(TestHash)
@@ -184,8 +186,8 @@ func TestPutBlockMD5Fail(t *testing.T) {
 
        // Check that PutBlock returns the expected error when the hash does
        // not match the block.
-       if err := PutBlock(BadBlock, TestHash); err != RequestHashError {
-               t.Error("Expected RequestHashError, got %v", err)
+       if _, err := PutBlock(BadBlock, TestHash); err != RequestHashError {
+               t.Errorf("Expected RequestHashError, got %v", err)
        }
 
        // Confirm that GetBlock fails to return anything.
@@ -209,8 +211,8 @@ func TestPutBlockCorrupt(t *testing.T) {
        // Store a corrupted block under TestHash.
        vols := KeepVM.AllWritable()
        vols[0].Put(TestHash, BadBlock)
-       if err := PutBlock(TestBlock, TestHash); err != nil {
-               t.Errorf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Errorf("PutBlock: n %d err %v", n, err)
        }
 
        // The block on disk should now match TestBlock.
@@ -229,9 +231,9 @@ func TestPutBlockCollision(t *testing.T) {
        defer teardown()
 
        // These blocks both hash to the MD5 digest cee9a457e790cf20d4bdaa6d69f01e41.
-       var b1 = []byte("\x0e0eaU\x9a\xa7\x87\xd0\x0b\xc6\xf7\x0b\xbd\xfe4\x04\xcf\x03e\x9epO\x854\xc0\x0f\xfbe\x9cL\x87@\xcc\x94/\xeb-\xa1\x15\xa3\xf4\x15\\\xbb\x86\x07Is\x86em}\x1f4\xa4 Y\xd7\x8fZ\x8d\xd1\xef")
-       var b2 = []byte("\x0e0eaU\x9a\xa7\x87\xd0\x0b\xc6\xf7\x0b\xbd\xfe4\x04\xcf\x03e\x9etO\x854\xc0\x0f\xfbe\x9cL\x87@\xcc\x94/\xeb-\xa1\x15\xa3\xf4\x15\xdc\xbb\x86\x07Is\x86em}\x1f4\xa4 Y\xd7\x8fZ\x8d\xd1\xef")
-       var locator = "cee9a457e790cf20d4bdaa6d69f01e41"
+       b1 := arvadostest.MD5CollisionData[0]
+       b2 := arvadostest.MD5CollisionData[1]
+       locator := arvadostest.MD5CollisionMD5
 
        // Prepare two test Keep volumes.
        KeepVM = MakeTestVolumeManager(2)
@@ -239,10 +241,10 @@ func TestPutBlockCollision(t *testing.T) {
 
        // Store one block, then attempt to store the other. Confirm that
        // PutBlock reported a CollisionError.
-       if err := PutBlock(b1, locator); err != nil {
+       if _, err := PutBlock(b1, locator); err != nil {
                t.Error(err)
        }
-       if err := PutBlock(b2, locator); err == nil {
+       if _, err := PutBlock(b2, locator); err == nil {
                t.Error("PutBlock did not report a collision")
        } else if err != CollisionError {
                t.Errorf("PutBlock returned %v", err)
@@ -273,8 +275,8 @@ func TestPutBlockTouchFails(t *testing.T) {
        // vols[0].Touch will fail on the next call, so the volume
        // manager will store a copy on vols[1] instead.
        vols[0].(*MockVolume).Touchable = false
-       if err := PutBlock(TestBlock, TestHash); err != nil {
-               t.Fatalf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Fatalf("PutBlock: n %d err %v", n, err)
        }
        vols[0].(*MockVolume).Touchable = true
 
@@ -333,8 +335,8 @@ func TestDiscoverTmpfs(t *testing.T) {
        f.Close()
        ProcMounts = f.Name()
 
-       var resultVols volumeSet
-       added := resultVols.Discover()
+       resultVols := volumeSet{}
+       added := (&unixVolumeAdder{&resultVols}).Discover()
 
        if added != len(resultVols) {
                t.Errorf("Discover returned %d, but added %d volumes",
@@ -373,8 +375,8 @@ func TestDiscoverNone(t *testing.T) {
        f.Close()
        ProcMounts = f.Name()
 
-       var resultVols volumeSet
-       added := resultVols.Discover()
+       resultVols := volumeSet{}
+       added := (&unixVolumeAdder{&resultVols}).Discover()
        if added != 0 || len(resultVols) != 0 {
                t.Fatalf("got %d, %v; expected 0, []", added, resultVols)
        }
index 47bb6d77179f226dc20d84a525fa5a0de3a5f7af..a97f2713275a623ff1aa197decce193442c86426 100644 (file)
@@ -17,15 +17,24 @@ type LoggingResponseWriter struct {
        Length int
        http.ResponseWriter
        ResponseBody string
+       sentHdr      time.Time
 }
 
 // WriteHeader writes header to ResponseWriter
 func (loggingWriter *LoggingResponseWriter) WriteHeader(code int) {
+       if loggingWriter.sentHdr == zeroTime {
+               loggingWriter.sentHdr = time.Now()
+       }
        loggingWriter.Status = code
        loggingWriter.ResponseWriter.WriteHeader(code)
 }
 
+var zeroTime time.Time
+
 func (loggingWriter *LoggingResponseWriter) Write(data []byte) (int, error) {
+       if loggingWriter.Length == 0 && len(data) > 0 && loggingWriter.sentHdr == zeroTime {
+               loggingWriter.sentHdr = time.Now()
+       }
        loggingWriter.Length += len(data)
        if loggingWriter.Status >= 400 {
                loggingWriter.ResponseBody += string(data)
@@ -46,12 +55,16 @@ func MakeLoggingRESTRouter() *LoggingRESTRouter {
 
 func (loggingRouter *LoggingRESTRouter) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        t0 := time.Now()
-       loggingWriter := LoggingResponseWriter{http.StatusOK, 0, resp, ""}
+       loggingWriter := LoggingResponseWriter{http.StatusOK, 0, resp, "", zeroTime}
        loggingRouter.router.ServeHTTP(&loggingWriter, req)
        statusText := http.StatusText(loggingWriter.Status)
        if loggingWriter.Status >= 400 {
                statusText = strings.Replace(loggingWriter.ResponseBody, "\n", "", -1)
        }
-       log.Printf("[%s] %s %s %.6fs %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], time.Since(t0).Seconds(), loggingWriter.Status, loggingWriter.Length, statusText)
+       now := time.Now()
+       tTotal := now.Sub(t0)
+       tLatency := loggingWriter.sentHdr.Sub(t0)
+       tResponse := now.Sub(loggingWriter.sentHdr)
+       log.Printf("[%s] %s %s %.6fs %.6fs %.6fs %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], tTotal.Seconds(), tLatency.Seconds(), tResponse.Seconds(), loggingWriter.Status, loggingWriter.Length, statusText)
 
 }
index 5579238112b65ed0747ec33e82e582e580e74e6d..6168a321c27e464fff5d0555ed363b2636331c76 100644 (file)
@@ -1,47 +1,7 @@
-/*
-Permissions management on Arvados locator hashes.
-
-The permissions structure for Arvados is as follows (from
-https://arvados.org/issues/2328)
-
-A Keep locator string has the following format:
-
-    [hash]+[size]+A[signature]@[timestamp]
-
-The "signature" string here is a cryptographic hash, expressed as a
-string of hexadecimal digits, and timestamp is a 32-bit Unix timestamp
-expressed as a hexadecimal number.  e.g.:
-
-    acbd18db4cc2f85cedef654fccc4a4d8+3+A257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a@7fffffff
-
-The signature represents a guarantee that this locator was generated
-by either Keep or the API server for use with the supplied API token.
-If a request to Keep includes a locator with a valid signature and is
-accompanied by the proper API token, the user has permission to GET
-that object.
-
-The signature may be generated either by Keep (after the user writes a
-block) or by the API server (if the user has can_read permission on
-the specified object). Keep and API server share a secret that is used
-to generate signatures.
-
-To verify a permission hint, Keep generates a new hint for the
-requested object (using the locator string, the timestamp, the
-permission secret and the user's API token, which must appear in the
-request headers) and compares it against the hint included in the
-request. If the permissions do not match, or if the API token is not
-present, Keep returns a 401 error.
-*/
-
 package main
 
 import (
-       "crypto/hmac"
-       "crypto/sha1"
-       "fmt"
-       "regexp"
-       "strconv"
-       "strings"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "time"
 )
 
@@ -50,69 +10,23 @@ import (
 // key.
 var PermissionSecret []byte
 
-// MakePermSignature returns a string representing the signed permission
-// hint for the blob identified by blobHash, apiToken and expiration timestamp.
-func MakePermSignature(blobHash string, apiToken string, expiry string) string {
-       hmac := hmac.New(sha1.New, PermissionSecret)
-       hmac.Write([]byte(blobHash))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(apiToken))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(expiry))
-       digest := hmac.Sum(nil)
-       return fmt.Sprintf("%x", digest)
-}
-
 // SignLocator takes a blobLocator, an apiToken and an expiry time, and
 // returns a signed locator string.
-func SignLocator(blobLocator string, apiToken string, expiry time.Time) string {
-       // If no permission secret or API token is available,
-       // return an unsigned locator.
-       if PermissionSecret == nil || apiToken == "" {
-               return blobLocator
-       }
-       // Extract the hash from the blob locator, omitting any size hint that may be present.
-       blobHash := strings.Split(blobLocator, "+")[0]
-       // Return the signed locator string.
-       timestampHex := fmt.Sprintf("%08x", expiry.Unix())
-       return blobLocator +
-               "+A" + MakePermSignature(blobHash, apiToken, timestampHex) +
-               "@" + timestampHex
+func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
+       return keepclient.SignLocator(blobLocator, apiToken, expiry, PermissionSecret)
 }
 
-var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
-
 // VerifySignature returns nil if the signature on the signedLocator
 // can be verified using the given apiToken. Otherwise it returns
 // either ExpiredError (if the timestamp has expired, which is
 // something the client could have figured out independently) or
 // PermissionError.
-func VerifySignature(signedLocator string, apiToken string) error {
-       matches := signedLocatorRe.FindStringSubmatch(signedLocator)
-       if matches == nil {
-               // Could not find a permission signature at all
-               return PermissionError
-       }
-       blobHash := matches[1]
-       sigHex := matches[2]
-       expHex := matches[3]
-       if expTime, err := ParseHexTimestamp(expHex); err != nil {
-               return PermissionError
-       } else if expTime.Before(time.Now()) {
+func VerifySignature(signedLocator, apiToken string) error {
+       err := keepclient.VerifySignature(signedLocator, apiToken, PermissionSecret)
+       if err == keepclient.ErrSignatureExpired {
                return ExpiredError
-       }
-       if sigHex != MakePermSignature(blobHash, apiToken, expHex) {
+       } else if err != nil {
                return PermissionError
        }
        return nil
 }
-
-// ParseHexTimestamp parses timestamp
-func ParseHexTimestamp(timestampHex string) (ts time.Time, err error) {
-       if tsInt, e := strconv.ParseInt(timestampHex, 16, 0); e == nil {
-               ts = time.Unix(tsInt, 0)
-       } else {
-               err = e
-       }
-       return ts, err
-}
index 59516af85f898efd223389f42199901c6ae65862..f4443fc7be1b423c4f535cccae66f0de32e71648 100644 (file)
@@ -1,6 +1,7 @@
 package main
 
 import (
+       "strconv"
        "testing"
        "time"
 )
@@ -23,103 +24,39 @@ const (
 )
 
 func TestSignLocator(t *testing.T) {
-       PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
-
-       if ts, err := ParseHexTimestamp(knownTimestamp); err != nil {
-               t.Errorf("bad knownTimestamp %s", knownTimestamp)
-       } else {
-               if knownSignedLocator != SignLocator(knownLocator, knownToken, ts) {
-                       t.Fail()
-               }
-       }
-}
-
-func TestVerifySignature(t *testing.T) {
-       PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(knownSignedLocator, knownToken) != nil {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureExtraHints(t *testing.T) {
-       PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken) != nil {
-               t.Fatal("Verify cannot handle hint before permission signature")
-       }
-
-       if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken) != nil {
-               t.Fatal("Verify cannot handle hint after permission signature")
-       }
-
-       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken) != nil {
-               t.Fatal("Verify cannot handle hints around permission signature")
-       }
-}
-
-// The size hint on the locator string should not affect signature validation.
-func TestVerifySignatureWrongSize(t *testing.T) {
-       PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
+       defer func(b []byte) {
+               PermissionSecret = b
+       }(PermissionSecret)
 
-       if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken) != nil {
-               t.Fatal("Verify cannot handle incorrect size hint")
+       tsInt, err := strconv.ParseInt(knownTimestamp, 16, 0)
+       if err != nil {
+               t.Fatal(err)
        }
+       t0 := time.Unix(tsInt, 0)
 
-       if VerifySignature(knownHash+knownSigHint, knownToken) != nil {
-               t.Fatal("Verify cannot handle missing size hint")
-       }
-}
-
-func TestVerifySignatureBadSig(t *testing.T) {
        PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
-
-       badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
-       if VerifySignature(badLocator, knownToken) != PermissionError {
-               t.Fail()
+       if x := SignLocator(knownLocator, knownToken, t0); x != knownSignedLocator {
+               t.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
        }
-}
 
-func TestVerifySignatureBadTimestamp(t *testing.T) {
-       PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
-
-       badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
-       if VerifySignature(badLocator, knownToken) != PermissionError {
-               t.Fail()
+       PermissionSecret = []byte("arbitrarykey")
+       if x := SignLocator(knownLocator, knownToken, t0); x == knownSignedLocator {
+               t.Fatalf("Got same signature %+q, even though PermissionSecret changed", x)
        }
 }
 
-func TestVerifySignatureBadSecret(t *testing.T) {
-       PermissionSecret = []byte("00000000000000000000")
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(knownSignedLocator, knownToken) != PermissionError {
-               t.Fail()
-       }
-}
+func TestVerifyLocator(t *testing.T) {
+       defer func(b []byte) {
+               PermissionSecret = b
+       }(PermissionSecret)
 
-func TestVerifySignatureBadToken(t *testing.T) {
        PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(knownSignedLocator, "00000000") != PermissionError {
-               t.Fail()
+       if err := VerifySignature(knownSignedLocator, knownToken); err != nil {
+               t.Fatal(err)
        }
-}
-
-func TestVerifySignatureExpired(t *testing.T) {
-       PermissionSecret = []byte(knownKey)
-       defer func() { PermissionSecret = nil }()
 
-       yesterday := time.Now().AddDate(0, 0, -1)
-       expiredLocator := SignLocator(knownHash, knownToken, yesterday)
-       if VerifySignature(expiredLocator, knownToken) != ExpiredError {
-               t.Fail()
+       PermissionSecret = []byte("arbitrarykey")
+       if err := VerifySignature(knownSignedLocator, knownToken); err == nil {
+               t.Fatal("Verified signature even with wrong PermissionSecret")
        }
 }
index 9f0b96fa35b1c32af4b06be92ea02632547ef249..2626d4bf68e1594f394ad4539f0f32a90fe00339 100644 (file)
@@ -95,6 +95,6 @@ func GenerateRandomAPIToken() string {
 
 // Put block
 var PutContent = func(content []byte, locator string) (err error) {
-       err = PutBlock(content, locator)
+       _, err = PutBlock(content, locator)
        return
 }
index e0bad0045af34d8067591fa723e9229be093790c..e0613a2cd12685195d841c2ef2ae31f7b90f60bb 100644 (file)
@@ -27,7 +27,7 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
 
        // start api and keep servers
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 
        // make arvadosclient
        arv, err := arvadosclient.MakeArvadosClient()
@@ -39,7 +39,6 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
        keepClient = &keepclient.KeepClient{
                Arvados:       &arv,
                Want_replicas: 1,
-               Using_proxy:   true,
                Client:        &http.Client{},
        }
 
@@ -82,6 +81,8 @@ func TestPullWorkerIntegration_GetNonExistingLocator(t *testing.T) {
        }
 
        pullRequest := SetupPullWorkerIntegrationTest(t, testData, false)
+       defer arvadostest.StopAPI()
+       defer arvadostest.StopKeep(2)
 
        performPullWorkerIntegrationTest(testData, pullRequest, t)
 }
@@ -97,6 +98,8 @@ func TestPullWorkerIntegration_GetExistingLocator(t *testing.T) {
        }
 
        pullRequest := SetupPullWorkerIntegrationTest(t, testData, true)
+       defer arvadostest.StopAPI()
+       defer arvadostest.StopKeep(2)
 
        performPullWorkerIntegrationTest(testData, pullRequest, t)
 }
index c6a41953842b6c2ab9403c84c04d17cee839fabd..5076b85e20703e8305b74ab4ce5a048cd17a81b0 100644 (file)
@@ -8,20 +8,13 @@ import (
        . "gopkg.in/check.v1"
        "io"
        "net/http"
-       "testing"
        "time"
 )
 
-type PullWorkerTestSuite struct{}
-
-// Gocheck boilerplate
-func TestPullWorker(t *testing.T) {
-       TestingT(t)
-}
-
-// Gocheck boilerplate
 var _ = Suite(&PullWorkerTestSuite{})
 
+type PullWorkerTestSuite struct{}
+
 var testPullLists map[string]string
 var readContent string
 var readError error
diff --git a/services/keepstore/s3_volume.go b/services/keepstore/s3_volume.go
new file mode 100644 (file)
index 0000000..572ee46
--- /dev/null
@@ -0,0 +1,312 @@
+package main
+
+import (
+       "encoding/base64"
+       "encoding/hex"
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "net/http"
+       "os"
+       "regexp"
+       "time"
+
+       "github.com/AdRoll/goamz/aws"
+       "github.com/AdRoll/goamz/s3"
+)
+
+var (
+       ErrS3DeleteNotAvailable = fmt.Errorf("delete without -s3-unsafe-delete is not implemented")
+
+       s3AccessKeyFile string
+       s3SecretKeyFile string
+       s3RegionName    string
+       s3Endpoint      string
+       s3Replication   int
+       s3UnsafeDelete  bool
+
+       s3ACL = s3.Private
+)
+
+const (
+       maxClockSkew  = 600 * time.Second
+       nearlyRFC1123 = "Mon, 2 Jan 2006 15:04:05 GMT"
+)
+
+type s3VolumeAdder struct {
+       *volumeSet
+}
+
+func (s *s3VolumeAdder) Set(bucketName string) error {
+       if bucketName == "" {
+               return fmt.Errorf("no container name given")
+       }
+       if s3AccessKeyFile == "" || s3SecretKeyFile == "" {
+               return fmt.Errorf("-s3-access-key-file and -s3-secret-key-file arguments must given before -s3-bucket-volume")
+       }
+       region, ok := aws.Regions[s3RegionName]
+       if s3Endpoint == "" {
+               if !ok {
+                       return fmt.Errorf("unrecognized region %+q; try specifying -s3-endpoint instead", s3RegionName)
+               }
+       } else {
+               if ok {
+                       return fmt.Errorf("refusing to use AWS region name %+q with endpoint %+q; "+
+                               "specify empty endpoint (\"-s3-endpoint=\") or use a different region name", s3RegionName, s3Endpoint)
+               }
+               region = aws.Region{
+                       Name:       s3RegionName,
+                       S3Endpoint: s3Endpoint,
+               }
+       }
+       var err error
+       var auth aws.Auth
+       auth.AccessKey, err = readKeyFromFile(s3AccessKeyFile)
+       if err != nil {
+               return err
+       }
+       auth.SecretKey, err = readKeyFromFile(s3SecretKeyFile)
+       if err != nil {
+               return err
+       }
+       if flagSerializeIO {
+               log.Print("Notice: -serialize is not supported by s3-bucket volumes.")
+       }
+       v := NewS3Volume(auth, region, bucketName, flagReadonly, s3Replication)
+       if err := v.Check(); err != nil {
+               return err
+       }
+       *s.volumeSet = append(*s.volumeSet, v)
+       return nil
+}
+
+func s3regions() (okList []string) {
+       for r, _ := range aws.Regions {
+               okList = append(okList, r)
+       }
+       return
+}
+
+func init() {
+       flag.Var(&s3VolumeAdder{&volumes},
+               "s3-bucket-volume",
+               "Use the given bucket as a storage volume. Can be given multiple times.")
+       flag.StringVar(
+               &s3RegionName,
+               "s3-region",
+               "",
+               fmt.Sprintf("AWS region used for subsequent -s3-bucket-volume arguments. Allowed values are %+q.", s3regions()))
+       flag.StringVar(
+               &s3Endpoint,
+               "s3-endpoint",
+               "",
+               "Endpoint URL used for subsequent -s3-bucket-volume arguments. If blank, use the AWS endpoint corresponding to the -s3-region argument. For Google Storage, use \"https://storage.googleapis.com\".")
+       flag.StringVar(
+               &s3AccessKeyFile,
+               "s3-access-key-file",
+               "",
+               "File containing the access key used for subsequent -s3-bucket-volume arguments.")
+       flag.StringVar(
+               &s3SecretKeyFile,
+               "s3-secret-key-file",
+               "",
+               "File containing the secret key used for subsequent -s3-bucket-volume arguments.")
+       flag.IntVar(
+               &s3Replication,
+               "s3-replication",
+               2,
+               "Replication level reported to clients for subsequent -s3-bucket-volume arguments.")
+       flag.BoolVar(
+               &s3UnsafeDelete,
+               "s3-unsafe-delete",
+               false,
+               "EXPERIMENTAL. Enable deletion (garbage collection), even though there are known race conditions that can cause data loss.")
+}
+
+type S3Volume struct {
+       *s3.Bucket
+       readonly      bool
+       replication   int
+       indexPageSize int
+}
+
+// NewS3Volume returns a new S3Volume using the given auth, region,
+// and bucket name. The replication argument specifies the replication
+// level to report when writing data.
+func NewS3Volume(auth aws.Auth, region aws.Region, bucket string, readonly bool, replication int) *S3Volume {
+       return &S3Volume{
+               Bucket: &s3.Bucket{
+                       S3:   s3.New(auth, region),
+                       Name: bucket,
+               },
+               readonly:      readonly,
+               replication:   replication,
+               indexPageSize: 1000,
+       }
+}
+
+func (v *S3Volume) Check() error {
+       return nil
+}
+
+func (v *S3Volume) Get(loc string) ([]byte, error) {
+       rdr, err := v.Bucket.GetReader(loc)
+       if err != nil {
+               return nil, v.translateError(err)
+       }
+       defer rdr.Close()
+       buf := bufs.Get(BlockSize)
+       n, err := io.ReadFull(rdr, buf)
+       switch err {
+       case nil, io.EOF, io.ErrUnexpectedEOF:
+               return buf[:n], nil
+       default:
+               bufs.Put(buf)
+               return nil, v.translateError(err)
+       }
+}
+
+func (v *S3Volume) Compare(loc string, expect []byte) error {
+       rdr, err := v.Bucket.GetReader(loc)
+       if err != nil {
+               return v.translateError(err)
+       }
+       defer rdr.Close()
+       return v.translateError(compareReaderWithBuf(rdr, expect, loc[:32]))
+}
+
+func (v *S3Volume) Put(loc string, block []byte) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       var opts s3.Options
+       if len(block) > 0 {
+               md5, err := hex.DecodeString(loc)
+               if err != nil {
+                       return err
+               }
+               opts.ContentMD5 = base64.StdEncoding.EncodeToString(md5)
+       }
+       return v.translateError(
+               v.Bucket.Put(
+                       loc, block, "application/octet-stream", s3ACL, opts))
+}
+
+func (v *S3Volume) Touch(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       result, err := v.Bucket.PutCopy(loc, s3ACL, s3.CopyOptions{
+               ContentType:       "application/octet-stream",
+               MetadataDirective: "REPLACE",
+       }, v.Bucket.Name+"/"+loc)
+       if err != nil {
+               return v.translateError(err)
+       }
+       t, err := time.Parse(time.RFC3339, result.LastModified)
+       if err != nil {
+               return err
+       }
+       if time.Since(t) > maxClockSkew {
+               return fmt.Errorf("PutCopy returned old LastModified %s => %s (%s ago)", result.LastModified, t, time.Since(t))
+       }
+       return nil
+}
+
+func (v *S3Volume) Mtime(loc string) (time.Time, error) {
+       resp, err := v.Bucket.Head(loc, nil)
+       if err != nil {
+               return zeroTime, v.translateError(err)
+       }
+       hdr := resp.Header.Get("Last-Modified")
+       t, err := time.Parse(time.RFC1123, hdr)
+       if err != nil && hdr != "" {
+               // AWS example is "Sun, 1 Jan 2006 12:00:00 GMT",
+               // which isn't quite "Sun, 01 Jan 2006 12:00:00 GMT"
+               // as required by HTTP spec. If it's not a valid HTTP
+               // header value, it's probably AWS (or s3test) giving
+               // us a nearly-RFC1123 timestamp.
+               t, err = time.Parse(nearlyRFC1123, hdr)
+       }
+       return t, err
+}
+
+func (v *S3Volume) IndexTo(prefix string, writer io.Writer) error {
+       nextMarker := ""
+       for {
+               listResp, err := v.Bucket.List(prefix, "", nextMarker, v.indexPageSize)
+               if err != nil {
+                       return err
+               }
+               for _, key := range listResp.Contents {
+                       t, err := time.Parse(time.RFC3339, key.LastModified)
+                       if err != nil {
+                               return err
+                       }
+                       if !v.isKeepBlock(key.Key) {
+                               continue
+                       }
+                       fmt.Fprintf(writer, "%s+%d %d\n", key.Key, key.Size, t.Unix())
+               }
+               if !listResp.IsTruncated {
+                       break
+               }
+               nextMarker = listResp.NextMarker
+       }
+       return nil
+}
+
+func (v *S3Volume) Delete(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       if t, err := v.Mtime(loc); err != nil {
+               return err
+       } else if time.Since(t) < blobSignatureTTL {
+               return nil
+       }
+       if !s3UnsafeDelete {
+               return ErrS3DeleteNotAvailable
+       }
+       return v.Bucket.Del(loc)
+}
+
+func (v *S3Volume) Status() *VolumeStatus {
+       return &VolumeStatus{
+               DeviceNum: 1,
+               BytesFree: BlockSize * 1000,
+               BytesUsed: 1,
+       }
+}
+
+func (v *S3Volume) String() string {
+       return fmt.Sprintf("s3-bucket:%+q", v.Bucket.Name)
+}
+
+func (v *S3Volume) Writable() bool {
+       return !v.readonly
+}
+func (v *S3Volume) Replication() int {
+       return v.replication
+}
+
+var s3KeepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
+
+func (v *S3Volume) isKeepBlock(s string) bool {
+       return s3KeepBlockRegexp.MatchString(s)
+}
+
+func (v *S3Volume) translateError(err error) error {
+       switch err := err.(type) {
+       case *s3.Error:
+               if err.StatusCode == http.StatusNotFound && err.Code == "NoSuchKey" {
+                       return os.ErrNotExist
+               }
+               // Other 404 errors like NoSuchVersion and
+               // NoSuchBucket are different problems which should
+               // get called out downstream, so we don't convert them
+               // to os.ErrNotExist.
+       }
+       return err
+}
diff --git a/services/keepstore/s3_volume_test.go b/services/keepstore/s3_volume_test.go
new file mode 100644 (file)
index 0000000..0c2cd49
--- /dev/null
@@ -0,0 +1,133 @@
+package main
+
+import (
+       "bytes"
+       "fmt"
+       "log"
+       "strings"
+       "time"
+
+       "github.com/AdRoll/goamz/aws"
+       "github.com/AdRoll/goamz/s3"
+       "github.com/AdRoll/goamz/s3/s3test"
+       check "gopkg.in/check.v1"
+)
+
+type TestableS3Volume struct {
+       *S3Volume
+       server      *s3test.Server
+       c           *check.C
+       serverClock *fakeClock
+}
+
+const (
+       TestBucketName = "testbucket"
+)
+
+type fakeClock struct {
+       now *time.Time
+}
+
+func (c *fakeClock) Now() time.Time {
+       if c.now == nil {
+               return time.Now()
+       }
+       return *c.now
+}
+
+func init() {
+       // Deleting isn't safe from races, but if it's turned on
+       // anyway we do expect it to pass the generic volume tests.
+       s3UnsafeDelete = true
+}
+
+func NewTestableS3Volume(c *check.C, readonly bool, replication int) *TestableS3Volume {
+       clock := &fakeClock{}
+       srv, err := s3test.NewServer(&s3test.Config{Clock: clock})
+       c.Assert(err, check.IsNil)
+       auth := aws.Auth{}
+       region := aws.Region{
+               Name:                 "test-region-1",
+               S3Endpoint:           srv.URL(),
+               S3LocationConstraint: true,
+       }
+       bucket := &s3.Bucket{
+               S3:   s3.New(auth, region),
+               Name: TestBucketName,
+       }
+       err = bucket.PutBucket(s3.ACL("private"))
+       c.Assert(err, check.IsNil)
+
+       return &TestableS3Volume{
+               S3Volume:    NewS3Volume(auth, region, TestBucketName, readonly, replication),
+               server:      srv,
+               serverClock: clock,
+       }
+}
+
+var _ = check.Suite(&StubbedS3Suite{})
+
+type StubbedS3Suite struct {
+       volumes []*TestableS3Volume
+}
+
+func (s *StubbedS3Suite) TestGeneric(c *check.C) {
+       DoGenericVolumeTests(c, func(t TB) TestableVolume {
+               return NewTestableS3Volume(c, false, 2)
+       })
+}
+
+func (s *StubbedS3Suite) TestGenericReadOnly(c *check.C) {
+       DoGenericVolumeTests(c, func(t TB) TestableVolume {
+               return NewTestableS3Volume(c, true, 2)
+       })
+}
+
+func (s *StubbedS3Suite) TestIndex(c *check.C) {
+       v := NewTestableS3Volume(c, false, 2)
+       v.indexPageSize = 3
+       for i := 0; i < 256; i++ {
+               v.PutRaw(fmt.Sprintf("%02x%030x", i, i), []byte{102, 111, 111})
+       }
+       for _, spec := range []struct {
+               prefix      string
+               expectMatch int
+       }{
+               {"", 256},
+               {"c", 16},
+               {"bc", 1},
+               {"abc", 0},
+       } {
+               buf := new(bytes.Buffer)
+               err := v.IndexTo(spec.prefix, buf)
+               c.Check(err, check.IsNil)
+
+               idx := bytes.SplitAfter(buf.Bytes(), []byte{10})
+               c.Check(len(idx), check.Equals, spec.expectMatch+1)
+               c.Check(len(idx[len(idx)-1]), check.Equals, 0)
+       }
+}
+
+// PutRaw skips the ContentMD5 test
+func (v *TestableS3Volume) PutRaw(loc string, block []byte) {
+       err := v.Bucket.Put(loc, block, "application/octet-stream", s3ACL, s3.Options{})
+       if err != nil {
+               log.Printf("PutRaw: %+v", err)
+       }
+}
+
+// TouchWithDate turns back the clock while doing a Touch(). We assume
+// there are no other operations happening on the same s3test server
+// while we do this.
+func (v *TestableS3Volume) TouchWithDate(locator string, lastPut time.Time) {
+       v.serverClock.now = &lastPut
+       err := v.Touch(locator)
+       if err != nil && !strings.Contains(err.Error(), "PutCopy returned old LastModified") {
+               log.Printf("Touch: %+v", err)
+       }
+       v.serverClock.now = nil
+}
+
+func (v *TestableS3Volume) Teardown() {
+       v.server.Quit()
+}
index 9bf291bdea28dce8b05baae738c8d42fbca7481c..7966c41b92bd89958308ec77765f0b7a5a1f0fd9 100644 (file)
@@ -195,6 +195,11 @@ type Volume interface {
        // will fail because it is full, but Mtime or Delete can
        // succeed -- then Writable should return false.
        Writable() bool
+
+       // Replication returns the storage redundancy of the
+       // underlying device. It will be passed on to clients in
+       // responses to PUT requests.
+       Replication() int
 }
 
 // A VolumeManager tells callers which volumes can read, which volumes
index c08c3f5f0007ac483486cb01d1e6a4bd68253d42..fae4a9ee807d5c799b5b27b61bee5ddc1bc21bab 100644 (file)
@@ -2,26 +2,42 @@ package main
 
 import (
        "bytes"
+       "crypto/md5"
+       "fmt"
        "os"
        "regexp"
        "sort"
        "strings"
-       "testing"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
 )
 
+type TB interface {
+        Error(args ...interface{})
+        Errorf(format string, args ...interface{})
+        Fail()
+        FailNow()
+        Failed() bool
+        Fatal(args ...interface{})
+        Fatalf(format string, args ...interface{})
+        Log(args ...interface{})
+        Logf(format string, args ...interface{})
+}
+
 // A TestableVolumeFactory returns a new TestableVolume. The factory
 // function, and the TestableVolume it returns, can use "t" to write
 // logs, fail the current test, etc.
-type TestableVolumeFactory func(t *testing.T) TestableVolume
+type TestableVolumeFactory func(t TB) TestableVolume
 
 // DoGenericVolumeTests runs a set of tests that every TestableVolume
 // is expected to pass. It calls factory to create a new TestableVolume
 // for each test case, to avoid leaking state between tests.
-func DoGenericVolumeTests(t *testing.T, factory TestableVolumeFactory) {
+func DoGenericVolumeTests(t TB, factory TestableVolumeFactory) {
        testGet(t, factory)
        testGetNoSuchBlock(t, factory)
 
+       testCompareNonexistent(t, factory)
        testCompareSameContent(t, factory, TestHash, TestBlock)
        testCompareSameContent(t, factory, EmptyHash, EmptyBlock)
        testCompareWithCollision(t, factory, TestHash, TestBlock, []byte("baddata"))
@@ -33,10 +49,10 @@ func DoGenericVolumeTests(t *testing.T, factory TestableVolumeFactory) {
 
        testPutBlockWithSameContent(t, factory, TestHash, TestBlock)
        testPutBlockWithSameContent(t, factory, EmptyHash, EmptyBlock)
-       testPutBlockWithDifferentContent(t, factory, TestHash, TestBlock, TestBlock2)
-       testPutBlockWithDifferentContent(t, factory, TestHash, EmptyBlock, TestBlock)
-       testPutBlockWithDifferentContent(t, factory, TestHash, TestBlock, EmptyBlock)
-       testPutBlockWithDifferentContent(t, factory, EmptyHash, EmptyBlock, TestBlock)
+       testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, arvadostest.MD5CollisionData[0], arvadostest.MD5CollisionData[1])
+       testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, EmptyBlock, arvadostest.MD5CollisionData[0])
+       testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, arvadostest.MD5CollisionData[0], EmptyBlock)
+       testPutBlockWithDifferentContent(t, factory, EmptyHash, EmptyBlock, arvadostest.MD5CollisionData[0])
        testPutMultipleBlocks(t, factory)
 
        testPutAndTouch(t, factory)
@@ -58,11 +74,13 @@ func DoGenericVolumeTests(t *testing.T, factory TestableVolumeFactory) {
 
        testGetConcurrent(t, factory)
        testPutConcurrent(t, factory)
+
+       testPutFullBlock(t, factory)
 }
 
 // Put a test block, get it and verify content
 // Test should pass for both writable and read-only volumes
-func testGet(t *testing.T, factory TestableVolumeFactory) {
+func testGet(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -70,7 +88,7 @@ func testGet(t *testing.T, factory TestableVolumeFactory) {
 
        buf, err := v.Get(TestHash)
        if err != nil {
-               t.Error(err)
+               t.Fatal(err)
        }
 
        bufs.Put(buf)
@@ -82,7 +100,7 @@ func testGet(t *testing.T, factory TestableVolumeFactory) {
 
 // Invoke get on a block that does not exist in volume; should result in error
 // Test should pass for both writable and read-only volumes
-func testGetNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+func testGetNoSuchBlock(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -91,9 +109,22 @@ func testGetNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
        }
 }
 
+// Compare() should return os.ErrNotExist if the block does not exist.
+// Otherwise, writing new data causes CompareAndTouch() to generate
+// error logs even though everything is working fine.
+func testCompareNonexistent(t TB, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       err := v.Compare(TestHash, TestBlock)
+       if err != os.ErrNotExist {
+               t.Errorf("Got err %T %q, expected os.ErrNotExist", err, err)
+       }
+}
+
 // Put a test block and compare the locator with same content
 // Test should pass for both writable and read-only volumes
-func testCompareSameContent(t *testing.T, factory TestableVolumeFactory, testHash string, testData []byte) {
+func testCompareSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
        v := factory(t)
        defer v.Teardown()
 
@@ -111,7 +142,7 @@ func testCompareSameContent(t *testing.T, factory TestableVolumeFactory, testHas
 // testHash = md5(testDataA).
 //
 // Test should pass for both writable and read-only volumes
-func testCompareWithCollision(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+func testCompareWithCollision(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
        v := factory(t)
        defer v.Teardown()
 
@@ -128,7 +159,7 @@ func testCompareWithCollision(t *testing.T, factory TestableVolumeFactory, testH
 // corrupted. Requires testHash = md5(testDataA) != md5(testDataB).
 //
 // Test should pass for both writable and read-only volumes
-func testCompareWithCorruptStoredData(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+func testCompareWithCorruptStoredData(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
        v := factory(t)
        defer v.Teardown()
 
@@ -142,7 +173,7 @@ func testCompareWithCorruptStoredData(t *testing.T, factory TestableVolumeFactor
 
 // Put a block and put again with same content
 // Test is intended for only writable volumes
-func testPutBlockWithSameContent(t *testing.T, factory TestableVolumeFactory, testHash string, testData []byte) {
+func testPutBlockWithSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
        v := factory(t)
        defer v.Teardown()
 
@@ -163,7 +194,7 @@ func testPutBlockWithSameContent(t *testing.T, factory TestableVolumeFactory, te
 
 // Put a block and put again with different content
 // Test is intended for only writable volumes
-func testPutBlockWithDifferentContent(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+func testPutBlockWithDifferentContent(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
        v := factory(t)
        defer v.Teardown()
 
@@ -171,10 +202,7 @@ func testPutBlockWithDifferentContent(t *testing.T, factory TestableVolumeFactor
                return
        }
 
-       err := v.Put(testHash, testDataA)
-       if err != nil {
-               t.Errorf("Got err putting block %q: %q, expected nil", testDataA, err)
-       }
+       v.PutRaw(testHash, testDataA)
 
        putErr := v.Put(testHash, testDataB)
        buf, getErr := v.Get(testHash)
@@ -182,14 +210,14 @@ func testPutBlockWithDifferentContent(t *testing.T, factory TestableVolumeFactor
                // Put must not return a nil error unless it has
                // overwritten the existing data.
                if bytes.Compare(buf, testDataB) != 0 {
-                       t.Errorf("Put succeeded but Get returned %+v, expected %+v", buf, testDataB)
+                       t.Errorf("Put succeeded but Get returned %+q, expected %+q", buf, testDataB)
                }
        } else {
                // It is permissible for Put to fail, but it must
                // leave us with either the original data, the new
                // data, or nothing at all.
                if getErr == nil && bytes.Compare(buf, testDataA) != 0 && bytes.Compare(buf, testDataB) != 0 {
-                       t.Errorf("Put failed but Get returned %+v, which is neither %+v nor %+v", buf, testDataA, testDataB)
+                       t.Errorf("Put failed but Get returned %+q, which is neither %+q nor %+q", buf, testDataA, testDataB)
                }
        }
        if getErr == nil {
@@ -199,7 +227,7 @@ func testPutBlockWithDifferentContent(t *testing.T, factory TestableVolumeFactor
 
 // Put and get multiple blocks
 // Test is intended for only writable volumes
-func testPutMultipleBlocks(t *testing.T, factory TestableVolumeFactory) {
+func testPutMultipleBlocks(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -225,33 +253,39 @@ func testPutMultipleBlocks(t *testing.T, factory TestableVolumeFactory) {
        data, err := v.Get(TestHash)
        if err != nil {
                t.Error(err)
-       } else if bytes.Compare(data, TestBlock) != 0 {
-               t.Errorf("Block present, but content is incorrect: Expected: %v  Found: %v", data, TestBlock)
+       } else {
+               if bytes.Compare(data, TestBlock) != 0 {
+                       t.Errorf("Block present, but got %+q, expected %+q", data, TestBlock)
+               }
+               bufs.Put(data)
        }
-       bufs.Put(data)
 
        data, err = v.Get(TestHash2)
        if err != nil {
                t.Error(err)
-       } else if bytes.Compare(data, TestBlock2) != 0 {
-               t.Errorf("Block present, but content is incorrect: Expected: %v  Found: %v", data, TestBlock2)
+       } else {
+               if bytes.Compare(data, TestBlock2) != 0 {
+                       t.Errorf("Block present, but got %+q, expected %+q", data, TestBlock2)
+               }
+               bufs.Put(data)
        }
-       bufs.Put(data)
 
        data, err = v.Get(TestHash3)
        if err != nil {
                t.Error(err)
-       } else if bytes.Compare(data, TestBlock3) != 0 {
-               t.Errorf("Block present, but content is incorrect: Expected: %v  Found: %v", data, TestBlock3)
+       } else {
+               if bytes.Compare(data, TestBlock3) != 0 {
+                       t.Errorf("Block present, but to %+q, expected %+q", data, TestBlock3)
+               }
+               bufs.Put(data)
        }
-       bufs.Put(data)
 }
 
 // testPutAndTouch
 //   Test that when applying PUT to a block that already exists,
 //   the block's modification time is updated.
 // Test is intended for only writable volumes
-func testPutAndTouch(t *testing.T, factory TestableVolumeFactory) {
+func testPutAndTouch(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -293,7 +327,7 @@ func testPutAndTouch(t *testing.T, factory TestableVolumeFactory) {
 
 // Touching a non-existing block should result in error.
 // Test should pass for both writable and read-only volumes
-func testTouchNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+func testTouchNoSuchBlock(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -304,7 +338,7 @@ func testTouchNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
 
 // Invoking Mtime on a non-existing block should result in error.
 // Test should pass for both writable and read-only volumes
-func testMtimeNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+func testMtimeNoSuchBlock(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -318,7 +352,7 @@ func testMtimeNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
 // * with a prefix
 // * with no such prefix
 // Test should pass for both writable and read-only volumes
-func testIndexTo(t *testing.T, factory TestableVolumeFactory) {
+func testIndexTo(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -326,6 +360,13 @@ func testIndexTo(t *testing.T, factory TestableVolumeFactory) {
        v.PutRaw(TestHash2, TestBlock2)
        v.PutRaw(TestHash3, TestBlock3)
 
+       // Blocks whose names aren't Keep hashes should be omitted from
+       // index
+       v.PutRaw("fffffffffnotreallyahashfffffffff", nil)
+       v.PutRaw("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", nil)
+       v.PutRaw("f0000000000000000000000000000000f", nil)
+       v.PutRaw("f00", nil)
+
        buf := new(bytes.Buffer)
        v.IndexTo("", buf)
        indexRows := strings.Split(string(buf.Bytes()), "\n")
@@ -368,9 +409,10 @@ func testIndexTo(t *testing.T, factory TestableVolumeFactory) {
 // Calling Delete() for a block immediately after writing it (not old enough)
 // should neither delete the data nor return an error.
 // Test is intended for only writable volumes
-func testDeleteNewBlock(t *testing.T, factory TestableVolumeFactory) {
+func testDeleteNewBlock(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
+       blobSignatureTTL = 300 * time.Second
 
        if v.Writable() == false {
                return
@@ -384,37 +426,40 @@ func testDeleteNewBlock(t *testing.T, factory TestableVolumeFactory) {
        data, err := v.Get(TestHash)
        if err != nil {
                t.Error(err)
-       } else if bytes.Compare(data, TestBlock) != 0 {
-               t.Error("Block still present, but content is incorrect: %+v != %+v", data, TestBlock)
+       } else {
+               if bytes.Compare(data, TestBlock) != 0 {
+                       t.Errorf("Got data %+q, expected %+q", data, TestBlock)
+               }
+               bufs.Put(data)
        }
-       bufs.Put(data)
 }
 
 // Calling Delete() for a block with a timestamp older than
 // blobSignatureTTL seconds in the past should delete the data.
 // Test is intended for only writable volumes
-func testDeleteOldBlock(t *testing.T, factory TestableVolumeFactory) {
+func testDeleteOldBlock(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
+       blobSignatureTTL = 300 * time.Second
 
        if v.Writable() == false {
                return
        }
 
        v.Put(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*blobSignatureTTL*time.Second))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*blobSignatureTTL))
 
        if err := v.Delete(TestHash); err != nil {
                t.Error(err)
        }
        if _, err := v.Get(TestHash); err == nil || !os.IsNotExist(err) {
-               t.Errorf("os.IsNotExist(%v) should have been true", err.Error())
+               t.Errorf("os.IsNotExist(%v) should have been true", err)
        }
 }
 
 // Calling Delete() for a block that does not exist should result in error.
 // Test should pass for both writable and read-only volumes
-func testDeleteNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+func testDeleteNoSuchBlock(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -425,7 +470,7 @@ func testDeleteNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
 
 // Invoke Status and verify that VolumeStatus is returned
 // Test should pass for both writable and read-only volumes
-func testStatus(t *testing.T, factory TestableVolumeFactory) {
+func testStatus(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -446,7 +491,7 @@ func testStatus(t *testing.T, factory TestableVolumeFactory) {
 
 // Invoke String for the volume; expect non-empty result
 // Test should pass for both writable and read-only volumes
-func testString(t *testing.T, factory TestableVolumeFactory) {
+func testString(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -457,7 +502,7 @@ func testString(t *testing.T, factory TestableVolumeFactory) {
 
 // Putting, updating, touching, and deleting blocks from a read-only volume result in error.
 // Test is intended for only read-only volumes
-func testUpdateReadOnly(t *testing.T, factory TestableVolumeFactory) {
+func testUpdateReadOnly(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -504,7 +549,7 @@ func testUpdateReadOnly(t *testing.T, factory TestableVolumeFactory) {
 
 // Launch concurrent Gets
 // Test should pass for both writable and read-only volumes
-func testGetConcurrent(t *testing.T, factory TestableVolumeFactory) {
+func testGetConcurrent(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -557,7 +602,7 @@ func testGetConcurrent(t *testing.T, factory TestableVolumeFactory) {
 
 // Launch concurrent Puts
 // Test is intended for only writable volumes
-func testPutConcurrent(t *testing.T, factory TestableVolumeFactory) {
+func testPutConcurrent(t TB, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
 
@@ -623,3 +668,31 @@ func testPutConcurrent(t *testing.T, factory TestableVolumeFactory) {
                t.Errorf("Get #3: expected %s, got %s", string(TestBlock3), string(buf))
        }
 }
+
+// Write and read back a full size block
+func testPutFullBlock(t TB, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if !v.Writable() {
+               return
+       }
+
+       wdata := make([]byte, BlockSize)
+       wdata[0] = 'a'
+       wdata[BlockSize-1] = 'z'
+       hash := fmt.Sprintf("%x", md5.Sum(wdata))
+       err := v.Put(hash, wdata)
+       if err != nil {
+               t.Fatal(err)
+       }
+       rdata, err := v.Get(hash)
+       if err != nil {
+               t.Error(err)
+       } else {
+               defer bufs.Put(rdata)
+       }
+       if bytes.Compare(rdata, wdata) != 0 {
+               t.Error("rdata != wdata")
+       }
+}
index f272c84c837676b12cc45ad3b2e962f635e746b1..d6714365de5bef98ad082b93f595231993bafa48 100644 (file)
@@ -214,3 +214,7 @@ func (v *MockVolume) String() string {
 func (v *MockVolume) Writable() bool {
        return !v.Readonly
 }
+
+func (v *MockVolume) Replication() int {
+       return 1
+}
index 6c0f5c4e978d995b5c9308f52d5d2414045d64fc..910cc25d613cb7690f944b418aebf5c205c7aced 100644 (file)
@@ -1,7 +1,9 @@
 package main
 
 import (
-       "bytes"
+       "bufio"
+       "errors"
+       "flag"
        "fmt"
        "io"
        "io/ioutil"
@@ -16,6 +18,97 @@ import (
        "time"
 )
 
+type unixVolumeAdder struct {
+       *volumeSet
+}
+
+func (vs *unixVolumeAdder) Set(value string) error {
+       if dirs := strings.Split(value, ","); len(dirs) > 1 {
+               log.Print("DEPRECATED: using comma-separated volume list.")
+               for _, dir := range dirs {
+                       if err := vs.Set(dir); err != nil {
+                               return err
+                       }
+               }
+               return nil
+       }
+       if len(value) == 0 || value[0] != '/' {
+               return errors.New("Invalid volume: must begin with '/'.")
+       }
+       if _, err := os.Stat(value); err != nil {
+               return err
+       }
+       var locker sync.Locker
+       if flagSerializeIO {
+               locker = &sync.Mutex{}
+       }
+       *vs.volumeSet = append(*vs.volumeSet, &UnixVolume{
+               root:     value,
+               locker:   locker,
+               readonly: flagReadonly,
+       })
+       return nil
+}
+
+func init() {
+       flag.Var(
+               &unixVolumeAdder{&volumes},
+               "volumes",
+               "Deprecated synonym for -volume.")
+       flag.Var(
+               &unixVolumeAdder{&volumes},
+               "volume",
+               "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
+}
+
+// Discover adds a UnixVolume for every directory named "keep" that is
+// located at the top level of a device- or tmpfs-backed mount point
+// other than "/". It returns the number of volumes added.
+func (vs *unixVolumeAdder) Discover() int {
+       added := 0
+       f, err := os.Open(ProcMounts)
+       if err != nil {
+               log.Fatalf("opening %s: %s", ProcMounts, err)
+       }
+       scanner := bufio.NewScanner(f)
+       for scanner.Scan() {
+               args := strings.Fields(scanner.Text())
+               if err := scanner.Err(); err != nil {
+                       log.Fatalf("reading %s: %s", ProcMounts, err)
+               }
+               dev, mount := args[0], args[1]
+               if mount == "/" {
+                       continue
+               }
+               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
+                       continue
+               }
+               keepdir := mount + "/keep"
+               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
+                       continue
+               }
+               // Set the -readonly flag (but only for this volume)
+               // if the filesystem is mounted readonly.
+               flagReadonlyWas := flagReadonly
+               for _, fsopt := range strings.Split(args[3], ",") {
+                       if fsopt == "ro" {
+                               flagReadonly = true
+                               break
+                       }
+                       if fsopt == "rw" {
+                               break
+                       }
+               }
+               if err := vs.Set(keepdir); err != nil {
+                       log.Printf("adding %q: %s", keepdir, err)
+               } else {
+                       added++
+               }
+               flagReadonly = flagReadonlyWas
+       }
+       return added
+}
+
 // A UnixVolume stores and retrieves blocks in a local directory.
 type UnixVolume struct {
        // path to the volume's root directory
@@ -96,7 +189,7 @@ func (v *UnixVolume) Get(loc string) ([]byte, error) {
        path := v.blockPath(loc)
        stat, err := v.stat(path)
        if err != nil {
-               return nil, err
+               return nil, v.translateError(err)
        }
        buf := bufs.Get(int(stat.Size()))
        err = v.getFunc(path, func(rdr io.Reader) error {
@@ -115,43 +208,11 @@ func (v *UnixVolume) Get(loc string) ([]byte, error) {
 // bytes.Compare(), but uses less memory.
 func (v *UnixVolume) Compare(loc string, expect []byte) error {
        path := v.blockPath(loc)
-       stat, err := v.stat(path)
-       if err != nil {
-               return err
+       if _, err := v.stat(path); err != nil {
+               return v.translateError(err)
        }
-       bufLen := 1 << 20
-       if int64(bufLen) > stat.Size() {
-               bufLen = int(stat.Size())
-               if bufLen < 1 {
-                       // len(buf)==0 would prevent us from handling
-                       // empty files the same way as non-empty
-                       // files, because reading 0 bytes at a time
-                       // never reaches EOF.
-                       bufLen = 1
-               }
-       }
-       cmp := expect
-       buf := make([]byte, bufLen)
        return v.getFunc(path, func(rdr io.Reader) error {
-               // Loop invariants: all data read so far matched what
-               // we expected, and the first N bytes of cmp are
-               // expected to equal the next N bytes read from
-               // reader.
-               for {
-                       n, err := rdr.Read(buf)
-                       if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
-                               return collisionOrCorrupt(loc[:32], expect[:len(expect)-len(cmp)], buf[:n], rdr)
-                       }
-                       cmp = cmp[n:]
-                       if err == io.EOF {
-                               if len(cmp) != 0 {
-                                       return collisionOrCorrupt(loc[:32], expect[:len(expect)-len(cmp)], nil, nil)
-                               }
-                               return nil
-                       } else if err != nil {
-                               return err
-                       }
-               }
+               return compareReaderWithBuf(rdr, expect, loc[:32])
        })
 }
 
@@ -231,6 +292,7 @@ func (v *UnixVolume) Status() *VolumeStatus {
 }
 
 var blockDirRe = regexp.MustCompile(`^[0-9a-f]+$`)
+var blockFileRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
 // IndexTo writes (to the given Writer) a list of blocks found on this
 // volume which begin with the specified prefix. If the prefix is an
@@ -287,6 +349,9 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
                        if !strings.HasPrefix(name, prefix) {
                                continue
                        }
+                       if !blockFileRe.MatchString(name) {
+                               continue
+                       }
                        _, err = fmt.Fprint(w,
                                name,
                                "+", fileInfo[0].Size(),
@@ -406,6 +471,10 @@ func (v *UnixVolume) Writable() bool {
        return !v.readonly
 }
 
+func (v *UnixVolume) Replication() int {
+       return 1
+}
+
 // lockfile and unlockfile use flock(2) to manage kernel file locks.
 func lockfile(f *os.File) error {
        return syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
@@ -414,3 +483,16 @@ func lockfile(f *os.File) error {
 func unlockfile(f *os.File) error {
        return syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
 }
+
+// Where appropriate, translate a more specific filesystem error to an
+// error recognized by handlers, like os.ErrNotExist.
+func (v *UnixVolume) translateError(err error) error {
+       switch err.(type) {
+       case *os.PathError:
+               // stat() returns a PathError if the parent directory
+               // (not just the file itself) is missing
+               return os.ErrNotExist
+       default:
+               return err
+       }
+}
index 924637f58e5004f1cec307266c87c0b53ae81d03..b216810f8cb0fc1008e4bb7bc99b3c306728d70a 100644 (file)
@@ -16,10 +16,10 @@ import (
 
 type TestableUnixVolume struct {
        UnixVolume
-       t *testing.T
+       t TB
 }
 
-func NewTestableUnixVolume(t *testing.T, serialize bool, readonly bool) *TestableUnixVolume {
+func NewTestableUnixVolume(t TB, serialize bool, readonly bool) *TestableUnixVolume {
        d, err := ioutil.TempDir("", "volume_test")
        if err != nil {
                t.Fatal(err)
@@ -66,28 +66,28 @@ func (v *TestableUnixVolume) Teardown() {
 
 // serialize = false; readonly = false
 func TestUnixVolumeWithGenericTests(t *testing.T) {
-       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+       DoGenericVolumeTests(t, func(t TB) TestableVolume {
                return NewTestableUnixVolume(t, false, false)
        })
 }
 
 // serialize = false; readonly = true
 func TestUnixVolumeWithGenericTestsReadOnly(t *testing.T) {
-       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+       DoGenericVolumeTests(t, func(t TB) TestableVolume {
                return NewTestableUnixVolume(t, false, true)
        })
 }
 
 // serialize = true; readonly = false
 func TestUnixVolumeWithGenericTestsSerialized(t *testing.T) {
-       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+       DoGenericVolumeTests(t, func(t TB) TestableVolume {
                return NewTestableUnixVolume(t, true, false)
        })
 }
 
 // serialize = false; readonly = false
 func TestUnixVolumeHandlersWithGenericVolumeTests(t *testing.T) {
-       DoHandlersWithGenericVolumeTests(t, func(t *testing.T) (*RRVolumeManager, []TestableVolume) {
+       DoHandlersWithGenericVolumeTests(t, func(t TB) (*RRVolumeManager, []TestableVolume) {
                vols := make([]Volume, 2)
                testableUnixVols := make([]TestableVolume, 2)
 
diff --git a/services/login-sync/agpl-3.0.txt b/services/login-sync/agpl-3.0.txt
new file mode 100644 (file)
index 0000000..dba13ed
--- /dev/null
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<http://www.gnu.org/licenses/>.
index 2f95a55c55f45c65b3fe767961e562f9006f554c..b3975b2ce4e6aa51612727eba4ede78b6a120fc0 100644 (file)
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
   s.authors     = ["Arvados Authors"]
   s.email       = 'gem-dev@curoverse.com'
   s.licenses    = ['GNU Affero General Public License, version 3.0']
-  s.files       = ["bin/arvados-login-sync"]
+  s.files       = ["bin/arvados-login-sync", "agpl-3.0.txt"]
   s.executables << "arvados-login-sync"
   s.required_ruby_version = '>= 2.1.0'
   s.add_runtime_dependency 'arvados', '~> 0.1', '>= 0.1.20150615153458'
index 9561fb1061f6de114633c70871232a6896dcbe8a..5ac3e99631fa7ff8f79b1c678386abca065da449 100644 (file)
@@ -1 +1,2 @@
+include agpl-3.0.txt
 include README.rst
diff --git a/services/nodemanager/agpl-3.0.txt b/services/nodemanager/agpl-3.0.txt
new file mode 100644 (file)
index 0000000..dba13ed
--- /dev/null
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<http://www.gnu.org/licenses/>.
index e7bd7bf7bdda983970d02595d90f9669cacd9f63..6e46bc0f4c6283ab0d73da212529299bdec4ba10 100644 (file)
@@ -31,6 +31,13 @@ def timestamp_fresh(timestamp, fresh_time):
     return (time.time() - timestamp) < fresh_time
 
 def arvados_node_missing(arvados_node, fresh_time):
+    """Indicate if cloud node corresponding to the arvados
+    node is "missing".
+
+    If True, this means the node has not pinged the API server within the timeout
+    period.  If False, the ping is up to date.  If the node has never pinged,
+    returns None.
+    """
     if arvados_node["last_ping_at"] is None:
         return None
     else:
index 9ea6c328e70162e6a621275fe92e1d6c7dd4d29d..b366e79ff834fc825697946cd9f3ae5007ad911f 100644 (file)
@@ -88,6 +88,7 @@ class ComputeNodeStateChangeBase(config.actor_class):
                   'slot_number': None,
                   'first_ping_at': None,
                   'last_ping_at': None,
+                  'properties': {},
                   'info': {'ec2_instance_id': None,
                            'last_action': explanation}},
             ).execute()
@@ -133,7 +134,33 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
                           self.cloud_size.name)
         self.cloud_node = self._cloud.create_node(self.cloud_size,
                                                   self.arvados_node)
+        if not self.cloud_node.size:
+             self.cloud_node.size = self.cloud_size
         self._logger.info("Cloud node %s created.", self.cloud_node.id)
+        self._later.update_arvados_node_properties()
+
+    @ComputeNodeStateChangeBase._retry(config.ARVADOS_ERRORS)
+    def update_arvados_node_properties(self):
+        """Tell Arvados some details about the cloud node.
+
+        Currently we only include size/price from our request, which
+        we already knew before create_cloud_node(), but doing it here
+        gives us an opportunity to provide more detail from
+        self.cloud_node, too.
+        """
+        self.arvados_node['properties']['cloud_node'] = {
+            # Note this 'size' is the node size we asked the cloud
+            # driver to create -- not necessarily equal to the size
+            # reported by the cloud driver for the node that was
+            # created.
+            'size': self.cloud_size.id,
+            'price': self.cloud_size.price,
+        }
+        self.arvados_node = self._arvados.nodes().update(
+            uuid=self.arvados_node['uuid'],
+            body={'properties': self.arvados_node['properties']},
+        ).execute()
+        self._logger.info("%s updated properties.", self.arvados_node['uuid'])
         self._later.post_create()
 
     @ComputeNodeStateChangeBase._retry()
@@ -154,6 +181,10 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
 
     This actor simply destroys a cloud node, retrying as needed.
     """
+    # Reasons for a shutdown to be cancelled.
+    WINDOW_CLOSED = "shutdown window closed"
+    NODE_BROKEN = "cloud failed to shut down broken node"
+
     def __init__(self, timer_actor, cloud_client, arvados_client, node_monitor,
                  cancellable=True, retry_wait=1, max_retry_wait=180):
         # If a ShutdownActor is cancellable, it will ask the
@@ -167,6 +198,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         self._monitor = node_monitor.proxy()
         self.cloud_node = self._monitor.cloud_node.get()
         self.cancellable = cancellable
+        self.cancel_reason = None
         self.success = None
 
     def on_start(self):
@@ -180,7 +212,10 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
             self.success = success_flag
         return super(ComputeNodeShutdownActor, self)._finished()
 
-    def cancel_shutdown(self):
+    def cancel_shutdown(self, reason):
+        self.cancel_reason = reason
+        self._logger.info("Cloud node %s shutdown cancelled: %s.",
+                          self.cloud_node.id, reason)
         self._finished(success_flag=False)
 
     def _stop_if_window_closed(orig_func):
@@ -188,10 +223,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         def stop_wrapper(self, *args, **kwargs):
             if (self.cancellable and
                   (not self._monitor.shutdown_eligible().get())):
-                self._logger.info(
-                    "Cloud node %s shutdown cancelled - no longer eligible.",
-                    self.cloud_node.id)
-                self._later.cancel_shutdown()
+                self._later.cancel_shutdown(self.WINDOW_CLOSED)
                 return None
             else:
                 return orig_func(self, *args, **kwargs)
@@ -201,8 +233,11 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
     @ComputeNodeStateChangeBase._retry()
     def shutdown_node(self):
         if not self._cloud.destroy_node(self.cloud_node):
-            # Force a retry.
-            raise cloud_types.LibcloudError("destroy_node failed")
+            if self._cloud.broken(self.cloud_node):
+                self._later.cancel_shutdown(self.NODE_BROKEN)
+            else:
+                # Force a retry.
+                raise cloud_types.LibcloudError("destroy_node failed")
         self._logger.info("Cloud node %s shut down.", self.cloud_node.id)
         arv_node = self._arvados_node()
         if arv_node is None:
@@ -325,16 +360,21 @@ class ComputeNodeMonitorActor(config.actor_class):
     def shutdown_eligible(self):
         if not self._shutdowns.window_open():
             return False
-        elif self.arvados_node is None:
+        if self.arvados_node is None:
             # Node is unpaired.
             # If it hasn't pinged Arvados after boot_fail seconds, shut it down
             return not timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after)
-        elif arvados_node_missing(self.arvados_node, self.node_stale_after) and self._cloud.broken(self.cloud_node):
+        missing = arvados_node_missing(self.arvados_node, self.node_stale_after)
+        if missing and self._cloud.broken(self.cloud_node):
             # Node is paired, but Arvados says it is missing and the cloud says the node
             # is in an error state, so shut it down.
             return True
-        else:
-            return self.in_state('idle')
+        if missing is None and self._cloud.broken(self.cloud_node):
+            self._logger.warning(
+                "cloud reports broken node, but paired node %s never pinged "
+                "(bug?) -- skipped check for node_stale_after",
+                self.arvados_node['uuid'])
+        return self.in_state('idle')
 
     def consider_shutdown(self):
         next_opening = self._shutdowns.next_opening()
index 225d856ba740cd207996d7a053e9169300b57561..919b57f42c8973bab91de742d1fee48598296f35 100644 (file)
@@ -13,6 +13,7 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
     SLURM_END_STATES = frozenset(['down\n', 'down*\n',
                                   'drain\n', 'drain*\n',
                                   'fail\n', 'fail*\n'])
+    SLURM_DRAIN_STATES = frozenset(['drain\n', 'drng\n'])
 
     def on_start(self):
         arv_node = self._arvados_node()
@@ -30,24 +31,40 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
         cmd.extend(args)
         subprocess.check_output(cmd)
 
-    @ShutdownActorBase._retry((subprocess.CalledProcessError,))
-    def cancel_shutdown(self):
+    def _get_slurm_state(self):
+        return subprocess.check_output(['sinfo', '--noheader', '-o', '%t', '-n', self._nodename])
+
+    # The following methods retry on OSError.  This is intended to mitigate bug
+    # #6321 where fork() of node manager raises "OSError: [Errno 12] Cannot
+    # allocate memory" resulting in the untimely death of the shutdown actor
+    # and tends to result in node manager getting into a wedged state where it
+    # won't allocate new nodes or shut down gracefully.  The underlying causes
+    # of the excessive memory usage that result in the "Cannot allocate memory"
+    # error are still being investigated.
+
+    @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
+    def cancel_shutdown(self, reason):
         if self._nodename:
-            self._set_node_state('RESUME')
-        return super(ComputeNodeShutdownActor, self).cancel_shutdown()
+            if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
+                # Resume from "drng" or "drain"
+                self._set_node_state('RESUME')
+            else:
+                # Node is in a state such as 'idle' or 'alloc' so don't
+                # try to resume it because that will just raise an error.
+                pass
+        return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
 
+    @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
     @ShutdownActorBase._stop_if_window_closed
-    @ShutdownActorBase._retry((subprocess.CalledProcessError,))
     def issue_slurm_drain(self):
         self._set_node_state('DRAIN', 'Reason=Node Manager shutdown')
         self._logger.info("Waiting for SLURM node %s to drain", self._nodename)
         self._later.await_slurm_drain()
 
+    @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
     @ShutdownActorBase._stop_if_window_closed
-    @ShutdownActorBase._retry((subprocess.CalledProcessError,))
     def await_slurm_drain(self):
-        output = subprocess.check_output(
-            ['sinfo', '--noheader', '-o', '%t', '-n', self._nodename])
+        output = self._get_slurm_state()
         if output in self.SLURM_END_STATES:
             self._later.shutdown_node()
         else:
index 14e804f385dc3e957fd707c34e7e4504ed1951e7..06b532ac5618d79915478930d05072278ad3753f 100644 (file)
@@ -53,6 +53,8 @@ class BaseComputeNodeDriver(object):
                 if new_pair is not None:
                     self.create_kwargs[new_pair[0]] = new_pair[1]
 
+        self.sizes = {sz.id: sz for sz in self.real.list_sizes()}
+
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
 
index 2d189788a51b3d13e02e3fbdf67e4f43ff56f3cb..20988c2660d8c687e5541c4b41270becafa49fab 100644 (file)
@@ -65,12 +65,19 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     def list_nodes(self):
         # Azure only supports filtering node lists by resource group.
         # Do our own filtering based on tag.
-        return [node for node in
+        nodes = [node for node in
                 super(ComputeNodeDriver, self).list_nodes()
                 if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
+        for n in nodes:
+            # Need to populate Node.size
+            if not n.size:
+                n.size = self.sizes[n.extra["properties"]["hardwareProfile"]["vmSize"]]
+        return nodes
 
     def broken(self, cloud_node):
         """Return true if libcloud has indicated the node is in a "broken" state."""
+        # UNKNOWN means the node state is unrecognized, which in practice means some combination
+        # of failure that the Azure libcloud driver doesn't know how to interpret.
         return (cloud_node.state in (cloud_types.NodeState.ERROR, cloud_types.NodeState.UNKNOWN))
 
     @classmethod
index c164a25b85f79e2e8c26dc91aef76554a0545fb7..44ee71569f6570a8ac09fa0da8f846bdd0b29ca3 100644 (file)
@@ -38,6 +38,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         nodelist = super(ComputeNodeDriver, self).list_nodes()
         for node in nodelist:
             self._ensure_private_ip(node)
+            node.size = self.sizes["1"]
         return nodelist
 
     def create_node(self, size, arvados_node):
index 6afe3163c9cf89bb287ca3c0a904311345a36555..c0a72b7235f6137c0c92e321edc547e5e91ead0f 100644 (file)
@@ -75,6 +75,14 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self.real.ex_create_tags(cloud_node,
                                  {'Name': arvados_node_fqdn(arvados_node)})
 
+    def list_nodes(self):
+        # Need to populate Node.size
+        nodes = super(ComputeNodeDriver, self).list_nodes()
+        for n in nodes:
+            if not n.size:
+                n.size = self.sizes[n.extra["instance_type"]]
+        return nodes
+
     @classmethod
     def node_fqdn(cls, node):
         return node.name
index 36bfc96213b9888df3a9afb7bd9c37fd6b76d4e1..e461ed5b67ffdecd58fc74b5188a8b6883dcdb85 100644 (file)
@@ -34,6 +34,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)
+        self._sizes_by_name = {sz.name: sz for sz in self.sizes.itervalues()}
         self._disktype_links = {dt.name: self._object_link(dt)
                                 for dt in self.real.ex_list_disktypes()}
 
@@ -103,9 +104,16 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     def list_nodes(self):
         # The GCE libcloud driver only supports filtering node lists by zone.
         # Do our own filtering based on tag list.
-        return [node for node in
-                super(ComputeNodeDriver, self).list_nodes()
-                if self.node_tags.issubset(node.extra.get('tags', []))]
+        nodelist = [node for node in
+                    super(ComputeNodeDriver, self).list_nodes()
+                    if self.node_tags.issubset(node.extra.get('tags', []))]
+        # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
+        # It's supposed to be the actual size object.  Check that it's not,
+        # and monkeypatch the results when that's the case.
+        if nodelist and not hasattr(nodelist[0].size, 'id'):
+            for node in nodelist:
+                node.size = self._sizes_by_name[node.size]
+        return nodelist
 
     @classmethod
     def _find_metadata(cls, metadata_items, key):
index 315df1c3f984e29a0edfc09c71f76051def7480d..dd45165deaa1514789c42428dacbb4bcf862b5a5 100644 (file)
@@ -5,7 +5,6 @@ from __future__ import absolute_import, print_function
 import ConfigParser
 import importlib
 import logging
-import ssl
 import sys
 
 import arvados
@@ -13,10 +12,10 @@ import httplib2
 import pykka
 from apiclient import errors as apierror
 
-# IOError is the base class for socket.error and friends.
+# IOError is the base class for socket.error, ssl.SSLError, and friends.
 # It seems like it hits the sweet spot for operations we want to retry:
 # it's low-level, but unlikely to catch code bugs.
-NETWORK_ERRORS = (IOError, ssl.SSLError)
+NETWORK_ERRORS = (IOError,)
 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
 
 actor_class = pykka.ThreadingActor
@@ -41,6 +40,7 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
                        'poll_time': '60',
                        'max_poll_time': '300',
                        'poll_stale_after': '600',
+                       'max_total_price': '0',
                        'boot_fail_after': str(sys.maxint),
                        'node_stale_after': str(60 * 60 * 2)},
             'Logging': {'file': '/dev/stderr',
@@ -116,7 +116,10 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
             sec_words = sec_name.split(None, 2)
             if sec_words[0] != 'Size':
                 continue
-            size_kwargs[sec_words[1]] = self.get_section(sec_name, int)
+            size_spec = self.get_section(sec_name, int)
+            if 'price' in size_spec:
+                size_spec['price'] = float(size_spec['price'])
+            size_kwargs[sec_words[1]] = size_spec
         # EC2 node sizes are identified by id. GCE sizes are identified by name.
         matching_sizes = []
         for size in all_sizes:
index ddddd417e76134076b8a33eb5da13ad87dbfb391..243d3bfaa4cd13fbf9a540affc20ae6e562979a9 100644 (file)
@@ -25,6 +25,7 @@ class _BaseNodeTracker(object):
     def __init__(self):
         self.nodes = {}
         self.orphans = {}
+        self._blacklist = set()
 
     # Proxy the methods listed below to self.nodes.
     def _proxy_method(name):
@@ -43,6 +44,9 @@ class _BaseNodeTracker(object):
     def add(self, record):
         self.nodes[self.record_key(record)] = record
 
+    def blacklist(self, key):
+        self._blacklist.add(key)
+
     def update_record(self, key, item):
         setattr(self.nodes[key], self.RECORD_ATTR, item)
 
@@ -50,7 +54,9 @@ class _BaseNodeTracker(object):
         unseen = set(self.nodes.iterkeys())
         for item in response:
             key = self.item_key(item)
-            if key in unseen:
+            if key in self._blacklist:
+                continue
+            elif key in unseen:
                 unseen.remove(key)
                 self.update_record(key, item)
             else:
@@ -97,13 +103,15 @@ class NodeManagerDaemonActor(actor_class):
     def __init__(self, server_wishlist_actor, arvados_nodes_actor,
                  cloud_nodes_actor, cloud_update_actor, timer_actor,
                  arvados_factory, cloud_factory,
-                 shutdown_windows, min_size, min_nodes, max_nodes,
+                 shutdown_windows, server_calculator,
+                 min_nodes, max_nodes,
                  poll_stale_after=600,
                  boot_fail_after=1800,
                  node_stale_after=7200,
                  node_setup_class=dispatch.ComputeNodeSetupActor,
                  node_shutdown_class=dispatch.ComputeNodeShutdownActor,
-                 node_actor_class=dispatch.ComputeNodeMonitorActor):
+                 node_actor_class=dispatch.ComputeNodeMonitorActor,
+                 max_total_price=0):
         super(NodeManagerDaemonActor, self).__init__()
         self._node_setup = node_setup_class
         self._node_shutdown = node_shutdown_class
@@ -116,9 +124,11 @@ class NodeManagerDaemonActor(actor_class):
         self._logger = logging.getLogger('arvnodeman.daemon')
         self._later = self.actor_ref.proxy()
         self.shutdown_windows = shutdown_windows
-        self.min_cloud_size = min_size
+        self.server_calculator = server_calculator
+        self.min_cloud_size = self.server_calculator.cheapest_size()
         self.min_nodes = min_nodes
         self.max_nodes = max_nodes
+        self.max_total_price = max_total_price
         self.poll_stale_after = poll_stale_after
         self.boot_fail_after = boot_fail_after
         self.node_stale_after = node_stale_after
@@ -182,9 +192,14 @@ class NodeManagerDaemonActor(actor_class):
                     self._pair_nodes(record, arv_rec.arvados_node)
                     break
         for key, record in self.cloud_nodes.orphans.iteritems():
+            if key in self.shutdowns:
+                try:
+                    self.shutdowns[key].stop().get()
+                except pykka.ActorDeadError:
+                    pass
+                del self.shutdowns[key]
             record.actor.stop()
             record.cloud_node = None
-            self.shutdowns.pop(key, None)
 
     def update_arvados_nodes(self, nodelist):
         self._update_poll_time('arvados_nodes')
@@ -199,51 +214,95 @@ class NodeManagerDaemonActor(actor_class):
                     self._pair_nodes(cloud_rec, arv_node)
                     break
 
-    def _nodes_up(self):
-        return sum(len(nodelist) for nodelist in
-                   [self.cloud_nodes, self.booted, self.booting])
-
-    def _nodes_busy(self):
+    def _nodes_up(self, size):
+        up = 0
+        up += sum(1
+                  for c in self.booting.itervalues()
+                  if size is None or c.cloud_size.get().id == size.id)
+        up += sum(1
+                  for i in (self.booted, self.cloud_nodes.nodes)
+                  for c in i.itervalues()
+                  if size is None or c.cloud_node.size.id == size.id)
+        return up
+
+    def _total_price(self):
+        cost = 0
+        cost += sum(self.server_calculator.find_size(c.cloud_size.get().id).price
+                  for c in self.booting.itervalues())
+        cost += sum(self.server_calculator.find_size(c.cloud_node.size.id).price
+                    for i in (self.booted, self.cloud_nodes.nodes)
+                    for c in i.itervalues())
+        return cost
+
+    def _nodes_busy(self, size):
         return sum(1 for busy in
                    pykka.get_all(rec.actor.in_state('busy') for rec in
-                                 self.cloud_nodes.nodes.itervalues())
+                                 self.cloud_nodes.nodes.itervalues()
+                                 if rec.cloud_node.size.id == size.id)
                    if busy)
 
-    def _nodes_missing(self):
+    def _nodes_missing(self, size):
         return sum(1 for arv_node in
                    pykka.get_all(rec.actor.arvados_node for rec in
                                  self.cloud_nodes.nodes.itervalues()
-                                 if rec.actor.cloud_node.get().id not in self.shutdowns)
+                                 if rec.cloud_node.size.id == size.id and rec.actor.cloud_node.get().id not in self.shutdowns)
                    if arv_node and cnode.arvados_node_missing(arv_node, self.node_stale_after))
 
-    def _nodes_wanted(self):
-        up_count = self._nodes_up()
-        under_min = self.min_nodes - up_count
-        over_max = up_count - self.max_nodes
+    def _size_wishlist(self, size):
+        return sum(1 for c in self.last_wishlist if c.id == size.id)
+
+    def _size_shutdowns(self, size):
+        sh = 0
+        for c in self.shutdowns.itervalues():
+            try:
+                if c.cloud_node.get().size.id == size.id:
+                    sh += 1
+            except pykka.ActorDeadError:
+                pass
+        return sh
+
+    def _nodes_wanted(self, size):
+        total_up_count = self._nodes_up(None)
+        under_min = self.min_nodes - total_up_count
+        over_max = total_up_count - self.max_nodes
+        total_price = self._total_price()
+
         if over_max >= 0:
             return -over_max
-        elif under_min > 0:
+        elif under_min > 0 and size.id == self.min_cloud_size.id:
             return under_min
+
+        up_count = self._nodes_up(size) - (self._size_shutdowns(size) +
+                                           self._nodes_busy(size) +
+                                           self._nodes_missing(size))
+
+        self._logger.debug("%s: idle nodes %i, wishlist size %i", size.name, up_count, self._size_wishlist(size))
+
+        wanted = self._size_wishlist(size) - up_count
+        if wanted > 0 and self.max_total_price and ((total_price + (size.price*wanted)) > self.max_total_price):
+            can_boot = int((self.max_total_price - total_price) / size.price)
+            if can_boot == 0:
+                self._logger.info("Not booting %s (price %s) because with it would exceed max_total_price of %s (current total_price is %s)",
+                                  size.name, size.price, self.max_total_price, total_price)
+            return can_boot
         else:
-            up_count -= len(self.shutdowns) + self._nodes_busy() + self._nodes_missing()
-            return len(self.last_wishlist) - up_count
-
-    def _nodes_excess(self):
-        up_count = self._nodes_up() - len(self.shutdowns)
-        over_min = up_count - self.min_nodes
-        if over_min <= 0:
-            return over_min
-        else:
-            return up_count - self._nodes_busy() - len(self.last_wishlist)
+            return wanted
+
+    def _nodes_excess(self, size):
+        up_count = self._nodes_up(size) - self._size_shutdowns(size)
+        if size.id == self.min_cloud_size.id:
+            up_count -= self.min_nodes
+        return up_count - self._nodes_busy(size) - self._size_wishlist(size)
 
     def update_server_wishlist(self, wishlist):
         self._update_poll_time('server_wishlist')
         self.last_wishlist = wishlist
-        nodes_wanted = self._nodes_wanted()
-        if nodes_wanted > 0:
-            self._later.start_node()
-        elif (nodes_wanted < 0) and self.booting:
-            self._later.stop_booting_node()
+        for size in reversed(self.server_calculator.cloud_sizes):
+            nodes_wanted = self._nodes_wanted(size)
+            if nodes_wanted > 0:
+                self._later.start_node(size)
+            elif (nodes_wanted < 0) and self.booting:
+                self._later.stop_booting_node(size)
 
     def _check_poll_freshness(orig_func):
         """Decorator to inhibit a method when poll information is stale.
@@ -263,15 +322,11 @@ class NodeManagerDaemonActor(actor_class):
         return wrapper
 
     @_check_poll_freshness
-    def start_node(self):
-        nodes_wanted = self._nodes_wanted()
+    def start_node(self, cloud_size):
+        nodes_wanted = self._nodes_wanted(cloud_size)
         if nodes_wanted < 1:
             return None
         arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
-        try:
-            cloud_size = self.last_wishlist[self._nodes_up()]
-        except IndexError:
-            cloud_size = self.min_cloud_size
         self._logger.info("Want %s more nodes.  Booting a %s node.",
                           nodes_wanted, cloud_size.name)
         new_setup = self._node_setup.start(
@@ -286,7 +341,7 @@ class NodeManagerDaemonActor(actor_class):
                 time.time())
         new_setup.subscribe(self._later.node_up)
         if nodes_wanted > 1:
-            self._later.start_node()
+            self._later.start_node(cloud_size)
 
     def _get_actor_attrs(self, actor, *attr_names):
         return pykka.get_all([getattr(actor, name) for name in attr_names])
@@ -303,15 +358,15 @@ class NodeManagerDaemonActor(actor_class):
                              self._later.shutdown_unpaired_node, cloud_node.id)
 
     @_check_poll_freshness
-    def stop_booting_node(self):
-        nodes_excess = self._nodes_excess()
+    def stop_booting_node(self, size):
+        nodes_excess = self._nodes_excess(size)
         if (nodes_excess < 1) or not self.booting:
             return None
         for key, node in self.booting.iteritems():
-            if node.stop_if_no_cloud_node().get():
+            if node.cloud_size.get().id == size.id and node.stop_if_no_cloud_node().get():
                 del self.booting[key]
                 if nodes_excess > 1:
-                    self._later.stop_booting_node()
+                    self._later.stop_booting_node(size)
                 break
 
     def _begin_node_shutdown(self, node_actor, cancellable):
@@ -327,7 +382,7 @@ class NodeManagerDaemonActor(actor_class):
 
     @_check_poll_freshness
     def node_can_shutdown(self, node_actor):
-        if self._nodes_excess() > 0:
+        if self._nodes_excess(node_actor.cloud_node.get().size) > 0:
             self._begin_node_shutdown(node_actor, cancellable=True)
 
     def shutdown_unpaired_node(self, cloud_node_id):
@@ -341,11 +396,13 @@ class NodeManagerDaemonActor(actor_class):
             self._begin_node_shutdown(record.actor, cancellable=False)
 
     def node_finished_shutdown(self, shutdown_actor):
-        success, cloud_node = self._get_actor_attrs(shutdown_actor, 'success',
-                                                    'cloud_node')
+        cloud_node, success, cancel_reason = self._get_actor_attrs(
+            shutdown_actor, 'cloud_node', 'success', 'cancel_reason')
         shutdown_actor.stop()
         cloud_node_id = cloud_node.id
         if not success:
+            if cancel_reason == self._node_shutdown.NODE_BROKEN:
+                self.cloud_nodes.blacklist(cloud_node_id)
             del self.shutdowns[cloud_node_id]
         elif cloud_node_id in self.booted:
             self.booted.pop(cloud_node_id).actor.stop()
index 06f66b71c244f5662fb0c9871f7c92b0603f1617..e0f0a5b1ec26045745fe479f76b8614eb700875d 100644 (file)
@@ -30,6 +30,9 @@ class ServerCalculator(object):
                     raise ValueError("unrecognized size field '%s'" % (name,))
                 setattr(self, name, override)
 
+            if self.price is None:
+                raise ValueError("Required field 'price' is None")
+
         def meets_constraints(self, **kwargs):
             for name, want_value in kwargs.iteritems():
                 have_value = getattr(self, name)
@@ -38,11 +41,12 @@ class ServerCalculator(object):
             return True
 
 
-    def __init__(self, server_list, max_nodes=None):
+    def __init__(self, server_list, max_nodes=None, max_price=None):
         self.cloud_sizes = [self.CloudSizeWrapper(s, **kws)
                             for s, kws in server_list]
         self.cloud_sizes.sort(key=lambda s: s.price)
         self.max_nodes = max_nodes or float('inf')
+        self.max_price = max_price or float('inf')
         self.logger = logging.getLogger('arvnodeman.jobqueue')
         self.logged_jobs = set()
 
@@ -69,20 +73,25 @@ class ServerCalculator(object):
         for job in queue:
             seen_jobs.add(job['uuid'])
             constraints = job['runtime_constraints']
-            want_count = self.coerce_int(constraints.get('min_nodes'), 1)
+            want_count = max(1, self.coerce_int(constraints.get('min_nodes'), 1))
             cloud_size = self.cloud_size_for_constraints(constraints)
             if cloud_size is None:
                 if job['uuid'] not in self.logged_jobs:
                     self.logged_jobs.add(job['uuid'])
                     self.logger.debug("job %s not satisfiable", job['uuid'])
-            elif (want_count <= self.max_nodes):
-                servers.extend([cloud_size.real] * max(1, want_count))
+            elif (want_count <= self.max_nodes) and (want_count*cloud_size.price <= self.max_price):
+                servers.extend([cloud_size.real] * want_count)
         self.logged_jobs.intersection_update(seen_jobs)
         return servers
 
     def cheapest_size(self):
         return self.cloud_sizes[0]
 
+    def find_size(self, sizeid):
+        for s in self.cloud_sizes:
+            if s.id == sizeid:
+                return s
+        return None
 
 class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
     """Actor to generate server wishlists from the job queue.
index 880158234da668ca212604252a070b8db30cddbd..e8c2fe661e5203469a1ee87341159aeb6cdc1aec 100644 (file)
@@ -62,7 +62,8 @@ def build_server_calculator(config):
     if not cloud_size_list:
         abort("No valid node sizes configured")
     return ServerCalculator(cloud_size_list,
-                            config.getint('Daemon', 'max_nodes'))
+                            config.getint('Daemon', 'max_nodes'),
+                            config.getfloat('Daemon', 'max_total_price'))
 
 def launch_pollers(config, server_calculator):
     poll_time = config.getint('Daemon', 'poll_time')
@@ -114,13 +115,14 @@ def main(args=None):
         cloud_node_updater, timer,
         config.new_arvados_client, config.new_cloud_client,
         config.shutdown_windows(),
-        server_calculator.cheapest_size(),
+        server_calculator,
         config.getint('Daemon', 'min_nodes'),
         config.getint('Daemon', 'max_nodes'),
         config.getint('Daemon', 'poll_stale_after'),
         config.getint('Daemon', 'boot_fail_after'),
         config.getint('Daemon', 'node_stale_after'),
-        node_setup, node_shutdown, node_monitor).proxy()
+        node_setup, node_shutdown, node_monitor,
+        max_total_price=config.getfloat('Daemon', 'max_total_price')).proxy()
 
     signal.pause()
     daemon_stopped = node_daemon.actor_ref.actor_stopped.is_set
index 50161c288c723a7a9a2753622a8f2481fd6484f7..0b5f1d360e8887a48637b68a94908b98160bc932 100644 (file)
@@ -7,14 +7,22 @@
 # through SLURM before shutting them down.
 #dispatcher = slurm
 
-# Node Manager will ensure that there are at least this many nodes
-# running at all times.
+# Node Manager will ensure that there are at least this many nodes running at
+# all times.  If node manager needs to start new idle nodes for the purpose of
+# satisfying min_nodes, it will use the cheapest node type.  However, depending
+# on usage patterns, it may also satisfy min_nodes by keeping alive some
+# more-expensive nodes
 min_nodes = 0
 
 # Node Manager will not start any compute nodes when at least this
 # many are running.
 max_nodes = 8
 
+# Upper limit on rate of spending (in $/hr), will not boot additional nodes
+# if total price of already running nodes meets or exceeds this threshold.
+# default 0 means no limit.
+max_total_price = 0
+
 # Poll Azure nodes and Arvados for new information every N seconds.
 poll_time = 60
 
@@ -138,16 +146,25 @@ tag_cluster = zyxwv
 # the API server to ping
 ping_host = hostname:port
 
-[Size Standard_D3]
-# You can define any number of Size sections to list Azure sizes you're
-# willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue (N.B.: defining more than one size has not been
-# tested yet).
+# You can define any number of Size sections to list Azure sizes you're willing
+# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
+# in the queue.  You must also provide price per hour as the Azure driver
+# compute currently does not report prices.
+#
+# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
+# for a list of known machine types that may be used as a Size parameter.
+#
 # Each size section MUST define the number of cores are available in this
 # size class (since libcloud does not provide any consistent API for exposing
 # this setting).
 # You may also want to define the amount of scratch space (expressed
 # in GB) for Crunch jobs.  You can also override Microsoft's provided
-# data fields by setting the same names here.
+# data fields by setting them here.
+
+[Size Standard_D3]
 cores = 4
-scratch = 200
+price = 0.56
+
+[Size Standard_D4]
+cores = 8
+price = 1.12
index 9b41ca14d54fc52956650649170574e90d99ae78..1c33bfd4578b62e403a057de3787c44caa198be4 100644 (file)
@@ -7,14 +7,22 @@
 # through SLURM before shutting them down.
 #dispatcher = slurm
 
-# Node Manager will ensure that there are at least this many nodes
-# running at all times.
+# Node Manager will ensure that there are at least this many nodes running at
+# all times.  If node manager needs to start new idle nodes for the purpose of
+# satisfying min_nodes, it will use the cheapest node type.  However, depending
+# on usage patterns, it may also satisfy min_nodes by keeping alive some
+# more-expensive nodes
 min_nodes = 0
 
 # Node Manager will not start any compute nodes when at least this
 # many are running.
 max_nodes = 8
 
+# Upper limit on rate of spending (in $/hr), will not boot additional nodes
+# if total price of already running nodes meets or exceeds this threshold.
+# default 0 means no limit.
+max_total_price = 0
+
 # Poll EC2 nodes and Arvados for new information every N seconds.
 poll_time = 60
 
@@ -123,16 +131,24 @@ subnet_id = idstring
 # compute node.
 security_groups = idstring1, idstring2
 
-[Size t2.medium]
+
 # You can define any number of Size sections to list EC2 sizes you're
 # willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue (N.B.: defining more than one size has not been
-# tested yet).
+# can run jobs in the queue.
+#
 # Each size section MUST define the number of cores are available in this
 # size class (since libcloud does not provide any consistent API for exposing
 # this setting).
 # You may also want to define the amount of scratch space (expressed
 # in GB) for Crunch jobs.  You can also override Amazon's provided
-# data fields by setting the same names here.
+# data fields (such as price per hour) by setting them here.
+
+[Size m4.large]
 cores = 2
+price = 0.126
+scratch = 100
+
+[Size m4.xlarge]
+cores = 4
+price = 0.252
 scratch = 100
index 67703703ba51b8ced8eac18b0ab9f72d65b16b08..06fcdff1552de44f95f4ea263b0e6d63db4d3409 100644 (file)
@@ -2,17 +2,25 @@
 # All times are in seconds unless specified otherwise.
 
 [Daemon]
-# Node Manager will ensure that there are at least this many nodes
-# running at all times.
+# Node Manager will ensure that there are at least this many nodes running at
+# all times.  If node manager needs to start new idle nodes for the purpose of
+# satisfying min_nodes, it will use the cheapest node type.  However, depending
+# on usage patterns, it may also satisfy min_nodes by keeping alive some
+# more-expensive nodes
 min_nodes = 0
 
 # Node Manager will not start any compute nodes when at least this
-# many are running.
+# running at all times.  By default, these will be the cheapest node size.
 max_nodes = 8
 
 # Poll compute nodes and Arvados for new information every N seconds.
 poll_time = 60
 
+# Upper limit on rate of spending (in $/hr), will not boot additional nodes
+# if total price of already running nodes meets or exceeds this threshold.
+# default 0 means no limit.
+max_total_price = 0
+
 # Polls have exponential backoff when services fail to respond.
 # This is the longest time to wait between polls.
 max_poll_time = 300
@@ -118,11 +126,10 @@ image = debian-7
 # See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
 # service_accounts = [{'email':'account@example.com', 'scopes':['storage-ro']}]
 
-[Size n1-standard-2]
+
 # You can define any number of Size sections to list node sizes you're
 # willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue (N.B.: defining more than one size has not been
-# tested yet).
+# can run jobs in the queue.
 #
 # The Size fields are interpreted the same way as with a libcloud NodeSize:
 # http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
@@ -135,6 +142,15 @@ image = debian-7
 # this setting).
 # You may also want to define the amount of scratch space (expressed
 # in GB) for Crunch jobs.
+# You can also override Google's provided data fields (such as price per hour)
+# by setting them here.
+
+[Size n1-standard-2]
 cores = 2
+price = 0.076
 scratch = 100
-ram = 512
+
+[Size n1-standard-4]
+cores = 4
+price = 0.152
+scratch = 200
\ No newline at end of file
index e7dcec9ef5f92f77c5ab27d36f02e196a010da65..3d838e49b443750be9608eec67738bbdb9b679f2 100644 (file)
@@ -24,6 +24,10 @@ setup(name='arvados-node-manager',
       url="https://arvados.org",
       license='GNU Affero General Public License, version 3.0',
       packages=find_packages(),
+      scripts=['bin/arvados-node-manager'],
+      data_files=[
+          ('share/doc/arvados-node-manager', ['agpl-3.0.txt', 'README.rst']),
+      ],
       install_requires=[
         'apache-libcloud>=0.16',
         'arvados-python-client>=0.1.20150206225333',
@@ -31,11 +35,10 @@ setup(name='arvados-node-manager',
         'python-daemon',
         ],
       dependency_links = [
-          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev3.zip"
+          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev4.zip"
       ],
-      scripts=['bin/arvados-node-manager'],
       test_suite='tests',
-      tests_require=['pbr<1.7.0', 'mock>=1.0', "apache-libcloud==0.18.1.dev3"],
+      tests_require=['pbr<1.7.0', 'mock>=1.0', "apache-libcloud==0.18.1.dev4"],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )
index e718fc134b7a20723f7f33ef5aeefd5095763eb6..ecf83c693a5a9da5121b4c26bbb7318e197298fe 100644 (file)
@@ -27,24 +27,38 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
 
     def make_actor(self, arv_node=None):
         if not hasattr(self, 'timer'):
-            self.make_mocks(arvados_effect=[arv_node])
+            self.make_mocks(arvados_effect=[arv_node] if arv_node else None)
         self.setup_actor = dispatch.ComputeNodeSetupActor.start(
             self.timer, self.api_client, self.cloud_client,
             testutil.MockSize(1), arv_node).proxy()
 
+    def assert_node_properties_updated(self, uuid=None,
+                                       size=testutil.MockSize(1)):
+        self.api_client.nodes().update.assert_any_call(
+            uuid=(uuid or self.arvados_effect[-1]['uuid']),
+            body={
+                'properties': {
+                    'cloud_node': {
+                        'size': size.id,
+                        'price': size.price}}})
+
     def test_creation_without_arvados_node(self):
         self.make_actor()
         self.assertEqual(self.arvados_effect[-1],
                          self.setup_actor.arvados_node.get(self.TIMEOUT))
-        self.assertTrue(self.api_client.nodes().create().execute.called)
+        self.assertEqual(1, self.api_client.nodes().create().execute.call_count)
+        self.assertEqual(1, self.api_client.nodes().update().execute.call_count)
+        self.assert_node_properties_updated()
         self.assertEqual(self.cloud_client.create_node(),
                          self.setup_actor.cloud_node.get(self.TIMEOUT))
 
     def test_creation_with_arvados_node(self):
+        self.make_mocks(arvados_effect=[testutil.arvados_node_mock()]*2)
         self.make_actor(testutil.arvados_node_mock())
         self.assertEqual(self.arvados_effect[-1],
                          self.setup_actor.arvados_node.get(self.TIMEOUT))
-        self.assertTrue(self.api_client.nodes().update().execute.called)
+        self.assert_node_properties_updated()
+        self.assertEqual(2, self.api_client.nodes().update().execute.call_count)
         self.assertEqual(self.cloud_client.create_node(),
                          self.setup_actor.cloud_node.get(self.TIMEOUT))
 
@@ -98,8 +112,9 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
         self.make_actor()
         subscriber = mock.Mock(name='subscriber_mock')
         self.setup_actor.subscribe(subscriber)
-        self.api_client.nodes().create().execute.side_effect = [
-            testutil.arvados_node_mock()]
+        retry_resp = [testutil.arvados_node_mock()]
+        self.api_client.nodes().create().execute.side_effect = retry_resp
+        self.api_client.nodes().update().execute.side_effect = retry_resp
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
         self.assertEqual(self.setup_actor.actor_ref.actor_urn,
                          subscriber.call_args[0][0].actor_ref.actor_urn)
@@ -116,11 +131,12 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
 
 class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
     def make_mocks(self, cloud_node=None, arvados_node=None,
-                   shutdown_open=True):
+                   shutdown_open=True, node_broken=False):
         self.timer = testutil.MockTimer()
         self.shutdowns = testutil.MockShutdownTimer()
         self.shutdowns._set_state(shutdown_open, 300)
         self.cloud_client = mock.MagicMock(name='cloud_client')
+        self.cloud_client.broken.return_value = node_broken
         self.arvados_client = mock.MagicMock(name='arvados_client')
         self.updates = mock.MagicMock(name='update_mock')
         if cloud_node is None:
@@ -201,6 +217,8 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.make_actor()
         self.check_success_flag(False, 2)
         self.assertFalse(self.cloud_client.destroy_node.called)
+        self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED,
+                         self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
 
     def test_shutdown_retries_when_cloud_fails(self):
         self.make_mocks()
@@ -210,6 +228,15 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.cloud_client.destroy_node.return_value = True
         self.check_success_flag(True)
 
+    def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
+        self.make_mocks(node_broken=True)
+        self.cloud_client.destroy_node.return_value = False
+        self.make_actor(start_time=0)
+        self.check_success_flag(False, 2)
+        self.assertEqual(1, self.cloud_client.destroy_node.call_count)
+        self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
+                         self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
+
     def test_late_subscribe(self):
         self.make_actor()
         subscriber = mock.Mock(name='subscriber_mock')
index c5097a717ca0e2a7ca31268010ea63defc102538..8648783bac5889f11a328af3b277bd1f21da5665 100644 (file)
@@ -60,12 +60,28 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.assertFalse(proc_mock.called)
 
     def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
-        proc_mock.return_value = 'alloc\n'
+        proc_mock.side_effect = iter(['drng\n', 'idle\n'])
         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
         self.make_actor()
         self.check_success_flag(False, 2)
-        self.check_slurm_got_args(proc_mock, 'NodeName=compute99',
-                                  'State=RESUME')
+        self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
+
+    def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
+        proc_mock.side_effect = iter(['alloc\n'])
+        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+        self.make_actor()
+        self.check_success_flag(False, 2)
+        self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
+
+    def test_cancel_shutdown_retry(self, proc_mock):
+        proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
+        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+        self.make_actor()
+        self.check_success_flag(False, 2)
+
+    def test_issue_slurm_drain_retry(self, proc_mock):
+        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+        self.check_success_after_reset(proc_mock)
 
     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
         proc_mock.return_value = 'drain\n'
index b9d7ee9fd0d27e34b5e1996095abfdd9c5d69219..41cb1aac8623ee8617f0464623aa4ce3fd3a7d28 100644 (file)
@@ -194,3 +194,33 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         self.assertEqual(
             service_accounts,
             self.driver_mock().create_node.call_args[1]['ex_service_accounts'])
+
+    def test_fix_string_size(self):
+        # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
+        # It's supposed to be the actual size object.  Make sure our driver
+        # patches that up in listings.
+        size = testutil.MockSize(2)
+        node = testutil.cloud_node_mock(size=size)
+        node.size = size.name
+        self.driver_mock().list_sizes.return_value = [size]
+        self.driver_mock().list_nodes.return_value = [node]
+        driver = self.new_driver()
+        nodelist = driver.list_nodes()
+        self.assertEqual(1, len(nodelist))
+        self.assertIs(node, nodelist[0])
+        self.assertIs(size, nodelist[0].size)
+
+    def test_skip_fix_when_size_not_string(self):
+        # Ensure we don't monkeypatch node sizes unless we need to.
+        size = testutil.MockSize(3)
+        node = testutil.cloud_node_mock(size=size)
+        self.driver_mock().list_nodes.return_value = [node]
+        driver = self.new_driver()
+        nodelist = driver.list_nodes()
+        self.assertEqual(1, len(nodelist))
+        self.assertIs(node, nodelist[0])
+        self.assertIs(size, nodelist[0].size)
+
+    def test_list_empty_nodes(self):
+        self.driver_mock().list_nodes.return_value = []
+        self.assertEqual([], self.new_driver().list_nodes())
index d43491e791c765e849115db722b4ed766f0968ab..e5669286543991efc99e2549051a1fb5720d7f39 100644 (file)
@@ -24,6 +24,7 @@ creds = dummy_creds
 
 [Size 1]
 cores = 1
+price = 0.8
 
 [Logging]
 file = /dev/null
@@ -55,6 +56,7 @@ testlogger = INFO
         size, kwargs = sizes[0]
         self.assertEqual('Small', size.name)
         self.assertEqual(1, kwargs['cores'])
+        self.assertEqual(0.8, kwargs['price'])
 
     def test_shutdown_windows(self):
         config = self.load_config()
index 0206f4c41d37cd8d81a391bac973e097672ddd61..200919bfb819183dfa8b50b426c7d33d60d64db9 100644 (file)
@@ -9,19 +9,48 @@ import mock
 import pykka
 
 import arvnodeman.daemon as nmdaemon
+from arvnodeman.jobqueue import ServerCalculator
 from arvnodeman.computenode.dispatch import ComputeNodeMonitorActor
 from . import testutil
+import logging
 
 class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                                      unittest.TestCase):
-    def new_setup_proxy(self):
+    def mock_node_start(self, **kwargs):
         # Make sure that every time the daemon starts a setup actor,
         # it gets a new mock object back.
-        self.last_setup = mock.MagicMock(name='setup_proxy_mock')
-        return self.last_setup
+        get_cloud_size = mock.MagicMock()
+        get_cloud_size.get.return_value = kwargs["cloud_size"]
+        mock_actor = mock.MagicMock()
+        mock_proxy = mock.NonCallableMock(name='setup_mock_proxy',
+                                          cloud_size=get_cloud_size,
+                                          actor_ref=mock_actor)
+        mock_actor.proxy.return_value = mock_proxy
+
+        self.last_setup = mock_proxy
+        return mock_actor
+
+    def mock_node_shutdown(self, **kwargs):
+        # Make sure that every time the daemon starts a shutdown actor,
+        # it gets a new mock object back.
+        get_cloud_node = mock.MagicMock()
+        if "node_monitor" in kwargs:
+            get_cloud_node.get.return_value = kwargs["node_monitor"].proxy().cloud_node.get()
+        mock_actor = mock.MagicMock()
+        mock_proxy = mock.NonCallableMock(name='shutdown_mock_proxy',
+                                          cloud_node=get_cloud_node,
+                                          actor_ref=mock_actor)
+
+        mock_actor.proxy.return_value = mock_proxy
+        self.last_shutdown = mock_proxy
+
+        return mock_actor
 
     def make_daemon(self, cloud_nodes=[], arvados_nodes=[], want_sizes=[],
-                    min_size=testutil.MockSize(1), min_nodes=0, max_nodes=8):
+                    avail_sizes=[(testutil.MockSize(1), {"cores": 1})],
+                    min_nodes=0, max_nodes=8,
+                    shutdown_windows=[54, 5, 1],
+                    max_total_price=None):
         for name in ['cloud_nodes', 'arvados_nodes', 'server_wishlist']:
             setattr(self, name + '_poller', mock.MagicMock(name=name + '_mock'))
         self.arv_factory = mock.MagicMock(name='arvados_mock')
@@ -29,16 +58,22 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.cloud_factory().node_start_time.return_value = time.time()
         self.cloud_updates = mock.MagicMock(name='updates_mock')
         self.timer = testutil.MockTimer(deliver_immediately=False)
+
         self.node_setup = mock.MagicMock(name='setup_mock')
-        self.node_setup.start().proxy.side_effect = self.new_setup_proxy
+        self.node_setup.start.side_effect = self.mock_node_start
         self.node_setup.reset_mock()
+
         self.node_shutdown = mock.MagicMock(name='shutdown_mock')
+        self.node_shutdown.start.side_effect = self.mock_node_shutdown
+
         self.daemon = nmdaemon.NodeManagerDaemonActor.start(
             self.server_wishlist_poller, self.arvados_nodes_poller,
             self.cloud_nodes_poller, self.cloud_updates, self.timer,
             self.arv_factory, self.cloud_factory,
-            [54, 5, 1], min_size, min_nodes, max_nodes, 600, 1800, 3600,
-            self.node_setup, self.node_shutdown).proxy()
+            shutdown_windows, ServerCalculator(avail_sizes),
+            min_nodes, max_nodes, 600, 1800, 3600,
+            self.node_setup, self.node_shutdown,
+            max_total_price=max_total_price).proxy()
         if cloud_nodes is not None:
             self.daemon.update_cloud_nodes(cloud_nodes).get(self.TIMEOUT)
         if arvados_nodes is not None:
@@ -109,7 +144,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_old_arvados_node_not_double_assigned(self):
         arv_node = testutil.arvados_node_mock(3, age=9000)
         size = testutil.MockSize(3)
-        self.make_daemon(arvados_nodes=[arv_node])
+        self.make_daemon(arvados_nodes=[arv_node], avail_sizes=[(size, {"cores":1})])
         self.daemon.update_server_wishlist([size]).get(self.TIMEOUT)
         self.daemon.update_server_wishlist([size, size]).get(self.TIMEOUT)
         self.stop_proxy(self.daemon)
@@ -165,11 +200,19 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
                          arvados_nodes=[testutil.arvados_node_mock(1),
                                         testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
                          want_sizes=[size])
-        self.daemon.shutdowns.get()[cloud_nodes[1].id] = True
+
+        get_cloud_node = mock.MagicMock(name="get_cloud_node")
+        get_cloud_node.get.return_value = cloud_nodes[1]
+        mock_node_monitor = mock.MagicMock()
+        mock_node_monitor.proxy.return_value = mock.NonCallableMock(cloud_node=get_cloud_node)
+        mock_shutdown = self.node_shutdown.start(node_monitor=mock_node_monitor)
+
+        self.daemon.shutdowns.get()[cloud_nodes[1].id] = mock_shutdown.proxy()
+
         self.assertEqual(2, self.alive_monitor_count())
         for mon_ref in self.monitor_list():
             self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
-        self.assertEqual(0, self.node_shutdown.start.call_count)
+        self.assertEqual(1, self.node_shutdown.start.call_count)
 
     def test_booting_nodes_counted(self):
         cloud_node = testutil.cloud_node_mock(1)
@@ -183,16 +226,19 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertEqual(1, self.node_setup.start.call_count)
 
     def test_boot_new_node_when_all_nodes_busy(self):
+        size = testutil.MockSize(2)
         arv_node = testutil.arvados_node_mock(2, job_uuid=True)
-        self.make_daemon([testutil.cloud_node_mock(2)], [arv_node],
-                         [testutil.MockSize(2)])
+        self.make_daemon([testutil.cloud_node_mock(2, size=size)], [arv_node],
+                         [size], avail_sizes=[(size, {"cores":1})])
         self.stop_proxy(self.daemon)
         self.assertTrue(self.node_setup.start.called)
 
     def test_boot_new_node_below_min_nodes(self):
         min_size = testutil.MockSize(1)
         wish_size = testutil.MockSize(3)
-        self.make_daemon([], [], None, min_size=min_size, min_nodes=2)
+        avail_sizes = [(min_size, {"cores": 1}),
+                       (wish_size, {"cores": 3})]
+        self.make_daemon([], [], None, avail_sizes=avail_sizes, min_nodes=2)
         self.daemon.update_server_wishlist([wish_size]).get(self.TIMEOUT)
         self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
         self.daemon.update_server_wishlist([wish_size]).get(self.TIMEOUT)
@@ -222,7 +268,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
             cloud_node = testutil.cloud_node_mock(id_num)
         if arv_node is None:
             arv_node = testutil.arvados_node_mock(id_num)
-        self.make_daemon(want_sizes=[testutil.MockSize(id_num)])
+        self.make_daemon(want_sizes=[testutil.MockSize(id_num)],
+                         avail_sizes=[(testutil.MockSize(id_num), {"cores":1})])
         self.daemon.max_nodes.get(self.TIMEOUT)
         self.assertEqual(1, self.node_setup.start.call_count)
         self.last_setup.cloud_node.get.return_value = cloud_node
@@ -373,7 +420,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
 
     def test_all_booting_nodes_tried_to_shut_down(self):
         size = testutil.MockSize(2)
-        self.make_daemon(want_sizes=[size])
+        self.make_daemon(want_sizes=[size], avail_sizes=[(size, {"cores":1})])
         self.daemon.max_nodes.get(self.TIMEOUT)
         setup1 = self.last_setup
         setup1.stop_if_no_cloud_node().get.return_value = False
@@ -437,21 +484,43 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.make_daemon([cloud_node], [testutil.arvados_node_mock(5)])
         self.assertEqual(1, self.alive_monitor_count())
         monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.last_shutdown.success.get.return_value = False
+        self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.last_shutdown.success.get.return_value = True
+        self.last_shutdown.stop.side_effect = lambda: monitor.stop()
+        self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
+        self.assertEqual(0, self.alive_monitor_count())
+
+    def test_broken_node_blackholed_after_cancelled_shutdown(self):
+        size = testutil.MockSize(8)
+        cloud_node = testutil.cloud_node_mock(8, size=size)
+        wishlist = [size]
+        self.make_daemon([cloud_node], [testutil.arvados_node_mock(8)],
+                         wishlist, avail_sizes=[(size, {"cores":1})])
+        self.assertEqual(1, self.alive_monitor_count())
+        self.assertFalse(self.node_setup.start.called)
+        monitor = self.monitor_list()[0].proxy()
         shutdown_proxy = self.node_shutdown.start().proxy
         shutdown_proxy().cloud_node.get.return_value = cloud_node
         shutdown_proxy().success.get.return_value = False
-        shutdown_proxy.reset_mock()
+        shutdown_proxy().cancel_reason.get.return_value = self.node_shutdown.NODE_BROKEN
+        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertTrue(shutdown_proxy.called)
         self.daemon.node_finished_shutdown(shutdown_proxy()).get(self.TIMEOUT)
-        shutdown_proxy().success.get.return_value = True
-        shutdown_proxy.reset_mock()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertTrue(shutdown_proxy.called)
+        self.daemon.update_cloud_nodes([cloud_node]).get(self.TIMEOUT)
+        self.daemon.update_server_wishlist(wishlist).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.node_setup.start.call_count)
 
     def test_nodes_shutting_down_replaced_below_max_nodes(self):
-        cloud_node = testutil.cloud_node_mock(6)
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6)])
+        size = testutil.MockSize(6)
+        cloud_node = testutil.cloud_node_mock(6, size=size)
+        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6)],
+                         avail_sizes=[(size, {"cores":1})])
         self.assertEqual(1, self.alive_monitor_count())
         monitor = self.monitor_list()[0].proxy()
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
@@ -475,9 +544,11 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.assertFalse(self.node_setup.start.called)
 
     def test_nodes_shutting_down_count_against_excess(self):
-        cloud_nodes = [testutil.cloud_node_mock(n) for n in [8, 9]]
-        arv_nodes = [testutil.arvados_node_mock(n) for n in [8, 9]]
-        self.make_daemon(cloud_nodes, arv_nodes, [testutil.MockSize(8)])
+        size = testutil.MockSize(8)
+        cloud_nodes = [testutil.cloud_node_mock(n, size=size) for n in [8, 9]]
+        arv_nodes = [testutil.arvados_node_mock(n, size=size) for n in [8, 9]]
+        self.make_daemon(cloud_nodes, arv_nodes, [size],
+                         avail_sizes=[(size, {"cores":1})])
         self.assertEqual(2, self.alive_monitor_count())
         for mon_ref in self.monitor_list():
             self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
@@ -505,3 +576,142 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.timer.deliver()
         self.stop_proxy(self.daemon)
         self.assertEqual(1, self.node_setup.start.call_count)
+
+    def test_shutdown_actor_stopped_when_cloud_node_delisted(self):
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(
+            1, self.last_shutdown.stop.call_count)
+
+    def test_shutdown_actor_cleanup_copes_with_dead_actors(self):
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        # We're mainly testing that update_cloud_nodes catches and handles
+        # the ActorDeadError.
+        self.last_shutdown.stop.side_effect = pykka.ActorDeadError
+        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.last_shutdown.stop.call_count)
+
+    def busywait(self, f):
+        n = 0
+        while not f() and n < 10:
+            time.sleep(.1)
+            n += 1
+        self.assertTrue(f())
+
+    def test_node_create_two_sizes(self):
+        small = testutil.MockSize(1)
+        big = testutil.MockSize(2)
+        avail_sizes = [(testutil.MockSize(1), {"cores":1}),
+                        (testutil.MockSize(2), {"cores":2})]
+        self.make_daemon(want_sizes=[small, small, small, big],
+                         avail_sizes=avail_sizes, max_nodes=4)
+
+        # the daemon runs in another thread, so we need to wait and see
+        # if it does all the work we're expecting it to do before stopping it.
+        self.busywait(lambda: self.node_setup.start.call_count == 4)
+        booting = self.daemon.booting.get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        sizecounts = {a[0].id: 0 for a in avail_sizes}
+        for b in booting.itervalues():
+            sizecounts[b.cloud_size.get().id] += 1
+        logging.info(sizecounts)
+        self.assertEqual(3, sizecounts[small.id])
+        self.assertEqual(1, sizecounts[big.id])
+
+    def test_node_max_nodes_two_sizes(self):
+        small = testutil.MockSize(1)
+        big = testutil.MockSize(2)
+        avail_sizes = [(testutil.MockSize(1), {"cores":1}),
+                        (testutil.MockSize(2), {"cores":2})]
+        self.make_daemon(want_sizes=[small, small, small, big],
+                         avail_sizes=avail_sizes, max_nodes=3)
+
+        # the daemon runs in another thread, so we need to wait and see
+        # if it does all the work we're expecting it to do before stopping it.
+        self.busywait(lambda: self.node_setup.start.call_count == 3)
+        booting = self.daemon.booting.get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        sizecounts = {a[0].id: 0 for a in avail_sizes}
+        for b in booting.itervalues():
+            sizecounts[b.cloud_size.get().id] += 1
+        self.assertEqual(2, sizecounts[small.id])
+        self.assertEqual(1, sizecounts[big.id])
+
+    def test_wishlist_reconfigure(self):
+        small = testutil.MockSize(1)
+        big = testutil.MockSize(2)
+        avail_sizes = [(small, {"cores":1}), (big, {"cores":2})]
+
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, small),
+                                      testutil.cloud_node_mock(2, small),
+                                      testutil.cloud_node_mock(3, big)],
+                         arvados_nodes=[testutil.arvados_node_mock(1),
+                                        testutil.arvados_node_mock(2),
+                                        testutil.arvados_node_mock(3)],
+                         want_sizes=[small, small, big],
+                         avail_sizes=avail_sizes)
+
+        self.daemon.update_server_wishlist([small, big, big]).get(self.TIMEOUT)
+
+        self.assertEqual(0, self.node_shutdown.start.call_count)
+
+        for c in self.daemon.cloud_nodes.get().nodes.itervalues():
+            self.daemon.node_can_shutdown(c.actor)
+
+        booting = self.daemon.booting.get()
+        shutdowns = self.daemon.shutdowns.get()
+
+        self.stop_proxy(self.daemon)
+
+        self.assertEqual(1, self.node_setup.start.call_count)
+        self.assertEqual(1, self.node_shutdown.start.call_count)
+
+        # booting a new big node
+        sizecounts = {a[0].id: 0 for a in avail_sizes}
+        for b in booting.itervalues():
+            sizecounts[b.cloud_size.get().id] += 1
+        self.assertEqual(0, sizecounts[small.id])
+        self.assertEqual(1, sizecounts[big.id])
+
+        # shutting down a small node
+        sizecounts = {a[0].id: 0 for a in avail_sizes}
+        for b in shutdowns.itervalues():
+            sizecounts[b.cloud_node.get().size.id] += 1
+        self.assertEqual(1, sizecounts[small.id])
+        self.assertEqual(0, sizecounts[big.id])
+
+    def test_node_max_price(self):
+        small = testutil.MockSize(1)
+        big = testutil.MockSize(2)
+        avail_sizes = [(testutil.MockSize(1), {"cores":1, "price":1}),
+                        (testutil.MockSize(2), {"cores":2, "price":2})]
+        self.make_daemon(want_sizes=[small, small, small, big],
+                         avail_sizes=avail_sizes,
+                         max_nodes=4,
+                         max_total_price=4)
+        # the daemon runs in another thread, so we need to wait and see
+        # if it does all the work we're expecting it to do before stopping it.
+        self.busywait(lambda: self.node_setup.start.call_count == 3)
+        booting = self.daemon.booting.get()
+        self.stop_proxy(self.daemon)
+
+        sizecounts = {a[0].id: 0 for a in avail_sizes}
+        for b in booting.itervalues():
+            sizecounts[b.cloud_size.get().id] += 1
+        logging.info(sizecounts)
+
+        # Booting 3 small nodes and not booting a big node would also partially
+        # satisfy the wishlist and come in under the price cap, however the way
+        # the update_server_wishlist() currently works effectively results in a
+        # round-robin creation of one node of each size in the wishlist, so
+        # test for that.
+        self.assertEqual(2, sizecounts[small.id])
+        self.assertEqual(1, sizecounts[big.id])
index 4c97aed8b109a576843105bad7cf7f62b14426ce..d4dc42f13959e5bef28aad66dd3ef4d0e0abe13e 100644 (file)
@@ -48,6 +48,16 @@ class ServerCalculatorTestCase(unittest.TestCase):
                                   {'min_scratch_mb_per_node': 200})
         self.assertEqual(6, len(servlist))
 
+    def test_ignore_too_expensive_jobs(self):
+        servcalc = self.make_calculator([1, 2], max_nodes=12, max_price=6)
+        servlist = self.calculate(servcalc,
+                                  {'min_cores_per_node': 1, 'min_nodes': 6})
+        self.assertEqual(6, len(servlist))
+
+        servlist = self.calculate(servcalc,
+                                  {'min_cores_per_node': 2, 'min_nodes': 6})
+        self.assertEqual(0, len(servlist))
+
     def test_job_requesting_max_nodes_accepted(self):
         servcalc = self.make_calculator([1], max_nodes=4)
         servlist = self.calculate(servcalc, {'min_nodes': 4})
@@ -57,6 +67,45 @@ class ServerCalculatorTestCase(unittest.TestCase):
         servcalc = self.make_calculator([2, 4, 1, 3])
         self.assertEqual(testutil.MockSize(1), servcalc.cheapest_size())
 
+    def test_next_biggest(self):
+        servcalc = self.make_calculator([1, 2, 4, 8])
+        servlist = self.calculate(servcalc,
+                                  {'min_cores_per_node': 3},
+                                  {'min_cores_per_node': 6})
+        self.assertEqual([servcalc.cloud_sizes[2].id,
+                          servcalc.cloud_sizes[3].id],
+                         [s.id for s in servlist])
+
+    def test_multiple_sizes(self):
+        servcalc = self.make_calculator([1, 2])
+        servlist = self.calculate(servcalc,
+                                  {'min_cores_per_node': 2},
+                                  {'min_cores_per_node': 1},
+                                  {'min_cores_per_node': 1})
+        self.assertEqual([servcalc.cloud_sizes[1].id,
+                          servcalc.cloud_sizes[0].id,
+                          servcalc.cloud_sizes[0].id],
+                         [s.id for s in servlist])
+
+        servlist = self.calculate(servcalc,
+                                  {'min_cores_per_node': 1},
+                                  {'min_cores_per_node': 2},
+                                  {'min_cores_per_node': 1})
+        self.assertEqual([servcalc.cloud_sizes[0].id,
+                          servcalc.cloud_sizes[1].id,
+                          servcalc.cloud_sizes[0].id],
+                         [s.id for s in servlist])
+
+        servlist = self.calculate(servcalc,
+                                  {'min_cores_per_node': 1},
+                                  {'min_cores_per_node': 1},
+                                  {'min_cores_per_node': 2})
+        self.assertEqual([servcalc.cloud_sizes[0].id,
+                          servcalc.cloud_sizes[0].id,
+                          servcalc.cloud_sizes[1].id],
+                         [s.id for s in servlist])
+
+
 
 class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
                                    unittest.TestCase):
@@ -82,4 +131,3 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
 
 if __name__ == '__main__':
     unittest.main()
-
index 82d6479e24ae53b33b676637ca772d867326b196..e543c2891698c6f0626c3bf5f444cd1c0b49a86b 100644 (file)
@@ -30,6 +30,7 @@ def arvados_node_mock(node_num=99, job_uuid=None, age=-1, **kwargs):
             'ip_address': ip_address_mock(node_num),
             'job_uuid': job_uuid,
             'crunch_worker_state': crunch_worker_state,
+            'properties': {},
             'info': {'ping_secret': 'defaulttestsecret'}}
     node.update(kwargs)
     return node
@@ -44,17 +45,6 @@ def cloud_object_mock(name_id, **extra):
     cloud_object.extra = extra
     return cloud_object
 
-def cloud_node_mock(node_num=99, **extra):
-    node = mock.NonCallableMagicMock(
-        ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',
-         'image', 'extra'],
-        name='cloud_node')
-    node.id = str(node_num)
-    node.name = node.id
-    node.public_ips = []
-    node.private_ips = [ip_address_mock(node_num)]
-    node.extra = extra
-    return node
 
 def cloud_node_fqdn(node):
     # We intentionally put the FQDN somewhere goofy to make sure tested code is
@@ -148,3 +138,16 @@ class RemotePollLoopActorTestMixin(ActorTestMixin):
         self.subscriber = mock.Mock(name='subscriber_mock')
         self.monitor = self.TEST_CLASS.start(
             self.client, self.timer, *args, **kwargs).proxy()
+
+def cloud_node_mock(node_num=99, size=MockSize(1), **extra):
+    node = mock.NonCallableMagicMock(
+        ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',
+         'image', 'extra'],
+        name='cloud_node')
+    node.id = str(node_num)
+    node.name = node.id
+    node.size = size
+    node.public_ips = []
+    node.private_ips = [ip_address_mock(node_num)]
+    node.extra = extra
+    return node
diff --git a/tools/keep-exercise/.gitignore b/tools/keep-exercise/.gitignore
new file mode 100644 (file)
index 0000000..6a1d10c
--- /dev/null
@@ -0,0 +1 @@
+keep-exercise
diff --git a/tools/keep-exercise/keep-exercise.go b/tools/keep-exercise/keep-exercise.go
new file mode 100644 (file)
index 0000000..9dc8f94
--- /dev/null
@@ -0,0 +1,182 @@
+// Testing tool for Keep services.
+//
+// keepexercise helps measure throughput and test reliability under
+// various usage patterns.
+//
+// By default, it reads and writes blocks containing 2^26 NUL
+// bytes. This generates network traffic without consuming much disk
+// space.
+//
+// For a more realistic test, enable -vary-request. Warning: this will
+// fill your storage volumes with random data if you leave it running,
+// which can cost you money or leave you with too little room for
+// useful data.
+//
+package main
+
+import (
+       "crypto/rand"
+       "encoding/binary"
+       "flag"
+       "io"
+       "io/ioutil"
+       "log"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+// Command line config knobs
+var (
+       BlockSize     = flag.Int("block-size", keepclient.BLOCKSIZE, "bytes per read/write op")
+       ReadThreads   = flag.Int("rthreads", 1, "number of concurrent readers")
+       WriteThreads  = flag.Int("wthreads", 1, "number of concurrent writers")
+       VaryRequest   = flag.Bool("vary-request", false, "vary the data for each request: consumes disk space, exercises write behavior")
+       VaryThread    = flag.Bool("vary-thread", false, "use -wthreads different data blocks")
+       Replicas      = flag.Int("replicas", 1, "replication level for writing")
+       StatsInterval = flag.Duration("stats-interval", time.Second, "time interval between IO stats reports, or 0 to disable")
+       ServiceURL    = flag.String("url", "", "specify scheme://host of a single keep service to exercise (instead of using all advertised services like normal clients)")
+       ServiceUUID   = flag.String("uuid", "", "specify UUID of a single advertised keep service to exercise")
+)
+
+func main() {
+       flag.Parse()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               log.Fatal(err)
+       }
+       kc, err := keepclient.MakeKeepClient(&arv)
+       if err != nil {
+               log.Fatal(err)
+       }
+       kc.Want_replicas = *Replicas
+       kc.Client.Timeout = 10 * time.Minute
+
+       overrideServices(kc)
+
+       nextBuf := make(chan []byte, *WriteThreads)
+       nextLocator := make(chan string, *ReadThreads+*WriteThreads)
+
+       go countBeans(nextLocator)
+       for i := 0; i < *WriteThreads; i++ {
+               go makeBufs(nextBuf, i)
+               go doWrites(kc, nextBuf, nextLocator)
+       }
+       for i := 0; i < *ReadThreads; i++ {
+               go doReads(kc, nextLocator)
+       }
+       <-make(chan struct{})
+}
+
+// Send 1234 to bytesInChan when we receive 1234 bytes from keepstore.
+var bytesInChan = make(chan uint64)
+var bytesOutChan = make(chan uint64)
+
+// Send struct{}{} to errorsChan when an error happens.
+var errorsChan = make(chan struct{})
+
+func countBeans(nextLocator chan string) {
+       t0 := time.Now()
+       var tickChan <-chan time.Time
+       if *StatsInterval > 0 {
+               tickChan = time.NewTicker(*StatsInterval).C
+       }
+       var bytesIn uint64
+       var bytesOut uint64
+       var errors uint64
+       for {
+               select {
+               case <-tickChan:
+                       elapsed := time.Since(t0)
+                       log.Printf("%v elapsed: read %v bytes (%.1f MiB/s), wrote %v bytes (%.1f MiB/s), errors %d",
+                               elapsed,
+                               bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576),
+                               bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576),
+                               errors,
+                       )
+               case i := <-bytesInChan:
+                       bytesIn += i
+               case o := <-bytesOutChan:
+                       bytesOut += o
+               case <-errorsChan:
+                       errors++
+               }
+       }
+}
+
+func makeBufs(nextBuf chan []byte, threadID int) {
+       buf := make([]byte, *BlockSize)
+       if *VaryThread {
+               binary.PutVarint(buf, int64(threadID))
+       }
+       for {
+               if *VaryRequest {
+                       buf = make([]byte, *BlockSize)
+                       if _, err := io.ReadFull(rand.Reader, buf); err != nil {
+                               log.Fatal(err)
+                       }
+               }
+               nextBuf <- buf
+       }
+}
+
+func doWrites(kc *keepclient.KeepClient, nextBuf chan []byte, nextLocator chan string) {
+       for buf := range nextBuf {
+               locator, _, err := kc.PutB(buf)
+               if err != nil {
+                       log.Print(err)
+                       errorsChan <- struct{}{}
+                       continue
+               }
+               bytesOutChan <- uint64(len(buf))
+               for cap(nextLocator) > len(nextLocator)+*WriteThreads {
+                       // Give the readers something to do, unless
+                       // they have lots queued up already.
+                       nextLocator <- locator
+               }
+       }
+}
+
+func doReads(kc *keepclient.KeepClient, nextLocator chan string) {
+       for locator := range nextLocator {
+               rdr, size, url, err := kc.Get(locator)
+               if err != nil {
+                       log.Print(err)
+                       errorsChan <- struct{}{}
+                       continue
+               }
+               n, err := io.Copy(ioutil.Discard, rdr)
+               rdr.Close()
+               if n != size || err != nil {
+                       log.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
+                       errorsChan <- struct{}{}
+                       continue
+                       // Note we don't count the bytes received in
+                       // partial/corrupt responses: we are measuring
+                       // throughput, not resource consumption.
+               }
+               bytesInChan <- uint64(n)
+       }
+}
+
+func overrideServices(kc *keepclient.KeepClient) {
+       roots := make(map[string]string)
+       if *ServiceURL != "" {
+               roots["zzzzz-bi6l4-000000000000000"] = *ServiceURL
+       } else if *ServiceUUID != "" {
+               for uuid, url := range kc.GatewayRoots() {
+                       if uuid == *ServiceUUID {
+                               roots[uuid] = url
+                               break
+                       }
+               }
+               if len(roots) == 0 {
+                       log.Fatalf("Service %q was not in list advertised by API %+q", *ServiceUUID, kc.GatewayRoots())
+               }
+       } else {
+               return
+       }
+       kc.SetServiceRoots(roots, roots, roots)
+}
diff --git a/tools/keep-rsync/.gitignore b/tools/keep-rsync/.gitignore
new file mode 100644 (file)
index 0000000..5ee7f3b
--- /dev/null
@@ -0,0 +1 @@
+keep-rsync
diff --git a/tools/keep-rsync/keep-rsync.go b/tools/keep-rsync/keep-rsync.go
new file mode 100644 (file)
index 0000000..820772e
--- /dev/null
@@ -0,0 +1,290 @@
+package main
+
+import (
+       "bufio"
+       "crypto/tls"
+       "errors"
+       "flag"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io/ioutil"
+       "log"
+       "net/http"
+       "os"
+       "regexp"
+       "strings"
+       "time"
+)
+
+func main() {
+       err := doMain()
+       if err != nil {
+               log.Fatalf("%v", err)
+       }
+}
+
+func doMain() error {
+       flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError)
+
+       srcConfigFile := flags.String(
+               "src",
+               "",
+               "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY for the source.")
+
+       dstConfigFile := flags.String(
+               "dst",
+               "",
+               "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, and ARVADOS_API_HOST_INSECURE for the destination.")
+
+       srcKeepServicesJSON := flags.String(
+               "src-keep-services-json",
+               "",
+               "An optional list of available source keepservices. "+
+                       "If not provided, this list is obtained from api server configured in src-config-file.")
+
+       dstKeepServicesJSON := flags.String(
+               "dst-keep-services-json",
+               "",
+               "An optional list of available destination keepservices. "+
+                       "If not provided, this list is obtained from api server configured in dst-config-file.")
+
+       replications := flags.Int(
+               "replications",
+               0,
+               "Number of replications to write to the destination. If replications not specified, "+
+                       "default replication level configured on destination server will be used.")
+
+       prefix := flags.String(
+               "prefix",
+               "",
+               "Index prefix")
+
+       // Parse args; omit the first arg which is the command name
+       flags.Parse(os.Args[1:])
+
+       srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile)
+       if err != nil {
+               return fmt.Errorf("Error loading src configuration from file: %s", err.Error())
+       }
+
+       dstConfig, _, err := loadConfig(*dstConfigFile)
+       if err != nil {
+               return fmt.Errorf("Error loading dst configuration from file: %s", err.Error())
+       }
+
+       // setup src and dst keepclients
+       kcSrc, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0)
+       if err != nil {
+               return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
+       }
+
+       kcDst, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications)
+       if err != nil {
+               return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
+       }
+
+       // Copy blocks not found in dst from src
+       err = performKeepRsync(kcSrc, kcDst, srcBlobSigningKey, *prefix)
+       if err != nil {
+               return fmt.Errorf("Error while syncing data: %s", err.Error())
+       }
+
+       return nil
+}
+
+type apiConfig struct {
+       APIToken        string
+       APIHost         string
+       APIHostInsecure bool
+       ExternalClient  bool
+}
+
+// Load src and dst config from given files
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+       if configFile == "" {
+               return config, blobSigningKey, errors.New("config file not specified")
+       }
+
+       config, blobSigningKey, err = readConfigFromFile(configFile)
+       if err != nil {
+               return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
+       }
+
+       return
+}
+
+var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
+
+// Read config from file
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
+       if !strings.Contains(filename, "/") {
+               filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
+       }
+
+       content, err := ioutil.ReadFile(filename)
+
+       if err != nil {
+               return config, "", err
+       }
+
+       lines := strings.Split(string(content), "\n")
+       for _, line := range lines {
+               if line == "" {
+                       continue
+               }
+
+               kv := strings.SplitN(line, "=", 2)
+               key := strings.TrimSpace(kv[0])
+               value := strings.TrimSpace(kv[1])
+
+               switch key {
+               case "ARVADOS_API_TOKEN":
+                       config.APIToken = value
+               case "ARVADOS_API_HOST":
+                       config.APIHost = value
+               case "ARVADOS_API_HOST_INSECURE":
+                       config.APIHostInsecure = matchTrue.MatchString(value)
+               case "ARVADOS_EXTERNAL_CLIENT":
+                       config.ExternalClient = matchTrue.MatchString(value)
+               case "ARVADOS_BLOB_SIGNING_KEY":
+                       blobSigningKey = value
+               }
+       }
+       return
+}
+
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
+       arv := arvadosclient.ArvadosClient{
+               ApiToken:    config.APIToken,
+               ApiServer:   config.APIHost,
+               ApiInsecure: config.APIHostInsecure,
+               Client: &http.Client{Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+               External: config.ExternalClient,
+       }
+
+       // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
+       if keepServicesJSON == "" {
+               kc, err = keepclient.MakeKeepClient(&arv)
+               if err != nil {
+                       return nil, err
+               }
+       } else {
+               kc = keepclient.New(&arv)
+               err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
+               if err != nil {
+                       return kc, err
+               }
+       }
+
+       if isDst {
+               // Get default replications value from destination, if it is not already provided
+               if replications == 0 {
+                       value, err := arv.Discovery("defaultCollectionReplication")
+                       if err == nil {
+                               replications = int(value.(float64))
+                       } else {
+                               return nil, err
+                       }
+               }
+
+               kc.Want_replicas = replications
+       }
+
+       return kc, nil
+}
+
+// Get unique block locators from src and dst
+// Copy any blocks missing in dst
+func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefix string) error {
+       // Get unique locators from src
+       srcIndex, err := getUniqueLocators(kcSrc, prefix)
+       if err != nil {
+               return err
+       }
+
+       // Get unique locators from dst
+       dstIndex, err := getUniqueLocators(kcDst, prefix)
+       if err != nil {
+               return err
+       }
+
+       // Get list of locators found in src, but missing in dst
+       toBeCopied := getMissingLocators(srcIndex, dstIndex)
+
+       // Copy each missing block to dst
+       log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
+               len(srcIndex), len(dstIndex), len(toBeCopied))
+
+       err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
+
+       return err
+}
+
+// Get list of unique locators from the specified cluster
+func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
+       uniqueLocators := map[string]bool{}
+
+       // Get index and dedup
+       for uuid := range kc.LocalRoots() {
+               reader, err := kc.GetIndex(uuid, prefix)
+               if err != nil {
+                       return uniqueLocators, err
+               }
+               scanner := bufio.NewScanner(reader)
+               for scanner.Scan() {
+                       uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
+               }
+       }
+
+       return uniqueLocators, nil
+}
+
+// Get list of locators that are in src but not in dst
+func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
+       var missingLocators []string
+       for locator := range srcLocators {
+               if _, ok := dstLocators[locator]; !ok {
+                       missingLocators = append(missingLocators, locator)
+               }
+       }
+       return missingLocators
+}
+
+// Copy blocks from src to dst; only those that are missing in dst are copied
+func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
+       total := len(toBeCopied)
+
+       startedAt := time.Now()
+       for done, locator := range toBeCopied {
+               if done == 0 {
+                       log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
+                               float64(done)/float64(total)*100, locator)
+               } else {
+                       timePerBlock := time.Since(startedAt) / time.Duration(done)
+                       log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total,
+                               float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
+               }
+
+               getLocator := locator
+               expiresAt := time.Now().AddDate(0, 0, 1)
+               if blobSigningKey != "" {
+                       getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
+               }
+
+               reader, len, _, err := kcSrc.Get(getLocator)
+               if err != nil {
+                       return fmt.Errorf("Error getting block: %v %v", locator, err)
+               }
+
+               _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
+               if err != nil {
+                       return fmt.Errorf("Error copying data block: %v %v", locator, err)
+               }
+       }
+
+       log.Printf("Successfully copied to destination %d blocks.", total)
+       return nil
+}
diff --git a/tools/keep-rsync/keep-rsync_test.go b/tools/keep-rsync/keep-rsync_test.go
new file mode 100644 (file)
index 0000000..94281fa
--- /dev/null
@@ -0,0 +1,481 @@
+package main
+
+import (
+       "crypto/md5"
+       "fmt"
+       "io/ioutil"
+       "log"
+       "os"
+       "strings"
+       "testing"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+
+       . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+// Gocheck boilerplate
+var _ = Suite(&ServerRequiredSuite{})
+var _ = Suite(&ServerNotRequiredSuite{})
+var _ = Suite(&DoMainTestSuite{})
+
+// Tests that require the Keep server running
+type ServerRequiredSuite struct{}
+type ServerNotRequiredSuite struct{}
+type DoMainTestSuite struct{}
+
+func (s *ServerRequiredSuite) SetUpSuite(c *C) {
+       // Start API server
+       arvadostest.StartAPI()
+}
+
+func (s *ServerRequiredSuite) TearDownSuite(c *C) {
+       arvadostest.StopAPI()
+       arvadostest.ResetEnv()
+}
+
+var initialArgs []string
+
+func (s *DoMainTestSuite) SetUpSuite(c *C) {
+       initialArgs = os.Args
+}
+
+var kcSrc, kcDst *keepclient.KeepClient
+var srcKeepServicesJSON, dstKeepServicesJSON, blobSigningKey string
+
+func (s *ServerRequiredSuite) SetUpTest(c *C) {
+       // reset all variables between tests
+       blobSigningKey = ""
+       srcKeepServicesJSON = ""
+       dstKeepServicesJSON = ""
+       kcSrc = &keepclient.KeepClient{}
+       kcDst = &keepclient.KeepClient{}
+}
+
+func (s *ServerRequiredSuite) TearDownTest(c *C) {
+       arvadostest.StopKeep(3)
+}
+
+func (s *DoMainTestSuite) SetUpTest(c *C) {
+       args := []string{"keep-rsync"}
+       os.Args = args
+}
+
+func (s *DoMainTestSuite) TearDownTest(c *C) {
+       os.Args = initialArgs
+}
+
+var testKeepServicesJSON = "{ \"kind\":\"arvados#keepServiceList\", \"etag\":\"\", \"self_link\":\"\", \"offset\":null, \"limit\":null, \"items\":[ { \"href\":\"/keep_services/zzzzz-bi6l4-123456789012340\", \"kind\":\"arvados#keepService\", \"etag\":\"641234567890enhj7hzx432e5\", \"uuid\":\"zzzzz-bi6l4-123456789012340\", \"owner_uuid\":\"zzzzz-tpzed-123456789012345\", \"service_host\":\"keep0.zzzzz.arvadosapi.com\", \"service_port\":25107, \"service_ssl_flag\":false, \"service_type\":\"disk\", \"read_only\":false }, { \"href\":\"/keep_services/zzzzz-bi6l4-123456789012341\", \"kind\":\"arvados#keepService\", \"etag\":\"641234567890enhj7hzx432e5\", \"uuid\":\"zzzzz-bi6l4-123456789012341\", \"owner_uuid\":\"zzzzz-tpzed-123456789012345\", \"service_host\":\"keep0.zzzzz.arvadosapi.com\", \"service_port\":25108, \"service_ssl_flag\":false, \"service_type\":\"disk\", \"read_only\":false } ], \"items_available\":2 }"
+
+// Testing keep-rsync needs two sets of keep services: src and dst.
+// The test setup hence creates 3 servers instead of the default 2,
+// and uses the first 2 as src and the 3rd as dst keep servers.
+func setupRsync(c *C, enforcePermissions bool, replications int) {
+       // srcConfig
+       var srcConfig apiConfig
+       srcConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+       srcConfig.APIToken = arvadostest.DataManagerToken
+       srcConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+       // dstConfig
+       var dstConfig apiConfig
+       dstConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+       dstConfig.APIToken = arvadostest.DataManagerToken
+       dstConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+       if enforcePermissions {
+               blobSigningKey = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
+       }
+
+       // Start Keep servers
+       arvadostest.StartKeep(3, enforcePermissions)
+
+       // setup keepclients
+       var err error
+       kcSrc, err = setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
+       c.Check(err, IsNil)
+
+       kcDst, err = setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
+       c.Check(err, IsNil)
+
+       for uuid := range kcSrc.LocalRoots() {
+               if strings.HasSuffix(uuid, "02") {
+                       delete(kcSrc.LocalRoots(), uuid)
+               }
+       }
+       for uuid := range kcSrc.GatewayRoots() {
+               if strings.HasSuffix(uuid, "02") {
+                       delete(kcSrc.GatewayRoots(), uuid)
+               }
+       }
+       for uuid := range kcSrc.WritableLocalRoots() {
+               if strings.HasSuffix(uuid, "02") {
+                       delete(kcSrc.WritableLocalRoots(), uuid)
+               }
+       }
+
+       for uuid := range kcDst.LocalRoots() {
+               if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+                       delete(kcDst.LocalRoots(), uuid)
+               }
+       }
+       for uuid := range kcDst.GatewayRoots() {
+               if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+                       delete(kcDst.GatewayRoots(), uuid)
+               }
+       }
+       for uuid := range kcDst.WritableLocalRoots() {
+               if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+                       delete(kcDst.WritableLocalRoots(), uuid)
+               }
+       }
+
+       if replications == 0 {
+               // Must have got default replications value of 2 from dst discovery document
+               c.Assert(kcDst.Want_replicas, Equals, 2)
+       } else {
+               // Since replications value is provided, it is used
+               c.Assert(kcDst.Want_replicas, Equals, replications)
+       }
+}
+
+func (s *ServerRequiredSuite) TestRsyncPutInOne_GetFromOtherShouldFail(c *C) {
+       setupRsync(c, false, 1)
+
+       // Put a block in src and verify that it is not found in dst
+       testNoCrosstalk(c, "test-data-1", kcSrc, kcDst)
+
+       // Put a block in dst and verify that it is not found in src
+       testNoCrosstalk(c, "test-data-2", kcDst, kcSrc)
+}
+
+func (s *ServerRequiredSuite) TestRsyncWithBlobSigning_PutInOne_GetFromOtherShouldFail(c *C) {
+       setupRsync(c, true, 1)
+
+       // Put a block in src and verify that it is not found in dst
+       testNoCrosstalk(c, "test-data-1", kcSrc, kcDst)
+
+       // Put a block in dst and verify that it is not found in src
+       testNoCrosstalk(c, "test-data-2", kcDst, kcSrc)
+}
+
+// Do a Put in the first and Get from the second,
+// which should raise block not found error.
+func testNoCrosstalk(c *C, testData string, kc1, kc2 *keepclient.KeepClient) {
+       // Put a block using kc1
+       locator, _, err := kc1.PutB([]byte(testData))
+       c.Assert(err, Equals, nil)
+
+       locator = strings.Split(locator, "+")[0]
+       _, _, _, err = kc2.Get(keepclient.SignLocator(locator, kc2.Arvados.ApiToken, time.Now().AddDate(0, 0, 1), []byte(blobSigningKey)))
+       c.Assert(err, NotNil)
+       c.Check(err.Error(), Equals, "Block not found")
+}
+
+// Test keep-rsync initialization, with srcKeepServicesJSON
+func (s *ServerRequiredSuite) TestRsyncInitializeWithKeepServicesJSON(c *C) {
+       srcKeepServicesJSON = testKeepServicesJSON
+
+       setupRsync(c, false, 1)
+
+       localRoots := kcSrc.LocalRoots()
+       c.Check(localRoots, NotNil)
+
+       foundIt := false
+       for k := range localRoots {
+               if k == "zzzzz-bi6l4-123456789012340" {
+                       foundIt = true
+               }
+       }
+       c.Check(foundIt, Equals, true)
+
+       foundIt = false
+       for k := range localRoots {
+               if k == "zzzzz-bi6l4-123456789012341" {
+                       foundIt = true
+               }
+       }
+       c.Check(foundIt, Equals, true)
+}
+
+// Test keep-rsync initialization with default replications count
+func (s *ServerRequiredSuite) TestInitializeRsyncDefaultReplicationsCount(c *C) {
+       setupRsync(c, false, 0)
+}
+
+// Test keep-rsync initialization with replications count argument
+func (s *ServerRequiredSuite) TestInitializeRsyncReplicationsCount(c *C) {
+       setupRsync(c, false, 3)
+}
+
+// Put some blocks in Src and some more in Dst
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync(c *C) {
+       testKeepRsync(c, false, "")
+}
+
+// Put some blocks in Src and some more in Dst with blob signing enabled.
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithBlobSigning(c *C) {
+       testKeepRsync(c, true, "")
+}
+
+// Put some blocks in Src and some more in Dst
+// Use prefix while doing rsync
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithPrefix(c *C) {
+       data := []byte("test-data-4")
+       hash := fmt.Sprintf("%x", md5.Sum(data))
+
+       testKeepRsync(c, false, hash[0:3])
+       c.Check(len(dstIndex) > len(dstLocators), Equals, true)
+}
+
+// Put some blocks in Src and some more in Dst
+// Use prefix not in src while doing rsync
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithNoSuchPrefixInSrc(c *C) {
+       testKeepRsync(c, false, "999")
+       c.Check(len(dstIndex), Equals, len(dstLocators))
+}
+
+// Put 5 blocks in src. Put 2 of those blocks in dst
+// Hence there are 3 additional blocks in src
+// Also, put 2 extra blocks in dst; they are hence only in dst
+// Run rsync and verify that those 7 blocks are now available in dst
+func testKeepRsync(c *C, enforcePermissions bool, prefix string) {
+       setupRsync(c, enforcePermissions, 1)
+
+       // setupTestData
+       setupTestData(c, prefix)
+
+       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, prefix)
+       c.Check(err, IsNil)
+
+       // Now GetIndex from dst and verify that all 5 from src and the 2 extra blocks are found
+       dstIndex, err = getUniqueLocators(kcDst, "")
+       c.Check(err, IsNil)
+
+       for _, locator := range srcLocatorsMatchingPrefix {
+               _, ok := dstIndex[locator]
+               c.Assert(ok, Equals, true)
+       }
+
+       for _, locator := range extraDstLocators {
+               _, ok := dstIndex[locator]
+               c.Assert(ok, Equals, true)
+       }
+
+       if prefix == "" {
+               // all blocks from src and the two extra blocks
+               c.Assert(len(dstIndex), Equals, len(srcLocators)+len(extraDstLocators))
+       } else {
+               // 1 matching prefix and copied over, 2 that were initially copied into dst along with src, and the 2 extra blocks
+               c.Assert(len(dstIndex), Equals, len(srcLocatorsMatchingPrefix)+len(extraDstLocators)+2)
+       }
+}
+
+// Setup test data in src and dst.
+var srcLocators, srcLocatorsMatchingPrefix, dstLocators, extraDstLocators []string
+var dstIndex map[string]bool
+
+func setupTestData(c *C, indexPrefix string) {
+       srcLocators = []string{}
+       srcLocatorsMatchingPrefix = []string{}
+       dstLocators = []string{}
+       extraDstLocators = []string{}
+       dstIndex = make(map[string]bool)
+
+       // Put a few blocks in src using kcSrc
+       for i := 0; i < 5; i++ {
+               hash, _, err := kcSrc.PutB([]byte(fmt.Sprintf("test-data-%d", i)))
+               c.Check(err, IsNil)
+
+               srcLocators = append(srcLocators, strings.Split(hash, "+A")[0])
+               if strings.HasPrefix(hash, indexPrefix) {
+                       srcLocatorsMatchingPrefix = append(srcLocatorsMatchingPrefix, strings.Split(hash, "+A")[0])
+               }
+       }
+
+       // Put first two of those src blocks in dst using kcDst
+       for i := 0; i < 2; i++ {
+               hash, _, err := kcDst.PutB([]byte(fmt.Sprintf("test-data-%d", i)))
+               c.Check(err, IsNil)
+               dstLocators = append(dstLocators, strings.Split(hash, "+A")[0])
+       }
+
+       // Put two more blocks in dst; they are not in src at all
+       for i := 0; i < 2; i++ {
+               hash, _, err := kcDst.PutB([]byte(fmt.Sprintf("other-data-%d", i)))
+               c.Check(err, IsNil)
+               dstLocators = append(dstLocators, strings.Split(hash, "+A")[0])
+               extraDstLocators = append(extraDstLocators, strings.Split(hash, "+A")[0])
+       }
+}
+
+// Setup rsync using srcKeepServicesJSON with fake keepservers.
+// Expect error during performKeepRsync due to unreachable src keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeSrcKeepservers(c *C) {
+       srcKeepServicesJSON = testKeepServicesJSON
+
+       setupRsync(c, false, 1)
+
+       err := performKeepRsync(kcSrc, kcDst, "", "")
+       log.Printf("Err = %v", err)
+       c.Check(strings.Contains(err.Error(), "no such host"), Equals, true)
+}
+
+// Setup rsync using dstKeepServicesJSON with fake keepservers.
+// Expect error during performKeepRsync due to unreachable dst keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeDstKeepservers(c *C) {
+       dstKeepServicesJSON = testKeepServicesJSON
+
+       setupRsync(c, false, 1)
+
+       err := performKeepRsync(kcSrc, kcDst, "", "")
+       log.Printf("Err = %v", err)
+       c.Check(strings.Contains(err.Error(), "no such host"), Equals, true)
+}
+
+// Test rsync with signature error during Get from src.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorGettingBlockFromSrc(c *C) {
+       setupRsync(c, true, 1)
+
+       // put some blocks in src and dst
+       setupTestData(c, "")
+
+       // Change blob signing key to a fake key, so that Get from src fails
+       blobSigningKey = "thisisfakeblobsigningkey"
+
+       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+       c.Check(strings.Contains(err.Error(), "HTTP 403 \"Forbidden\""), Equals, true)
+}
+
+// Test rsync with error during Put to src.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorPuttingBlockInDst(c *C) {
+       setupRsync(c, false, 1)
+
+       // put some blocks in src and dst
+       setupTestData(c, "")
+
+       // Increase Want_replicas on dst to result in insufficient replicas error during Put
+       kcDst.Want_replicas = 2
+
+       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+       c.Check(strings.Contains(err.Error(), "Could not write sufficient replicas"), Equals, true)
+}
+
+// Test loadConfig func
+func (s *ServerNotRequiredSuite) TestLoadConfig(c *C) {
+       // Setup a src config file
+       srcFile := setupConfigFile(c, "src-config")
+       defer os.Remove(srcFile.Name())
+       srcConfigFile := srcFile.Name()
+
+       // Setup a dst config file
+       dstFile := setupConfigFile(c, "dst-config")
+       defer os.Remove(dstFile.Name())
+       dstConfigFile := dstFile.Name()
+
+       // load configuration from those files
+       srcConfig, srcBlobSigningKey, err := loadConfig(srcConfigFile)
+       c.Check(err, IsNil)
+
+       c.Assert(srcConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+       c.Assert(srcConfig.APIToken, Equals, arvadostest.DataManagerToken)
+       c.Assert(srcConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+       c.Assert(srcConfig.ExternalClient, Equals, false)
+
+       dstConfig, _, err := loadConfig(dstConfigFile)
+       c.Check(err, IsNil)
+
+       c.Assert(dstConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+       c.Assert(dstConfig.APIToken, Equals, arvadostest.DataManagerToken)
+       c.Assert(dstConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+       c.Assert(dstConfig.ExternalClient, Equals, false)
+
+       c.Assert(srcBlobSigningKey, Equals, "abcdefg")
+}
+
+// Test loadConfig func without setting up the config files
+func (s *ServerNotRequiredSuite) TestLoadConfig_MissingSrcConfig(c *C) {
+       _, _, err := loadConfig("")
+       c.Assert(err.Error(), Equals, "config file not specified")
+}
+
+// Test loadConfig func - error reading config
+func (s *ServerNotRequiredSuite) TestLoadConfig_ErrorLoadingSrcConfig(c *C) {
+       _, _, err := loadConfig("no-such-config-file")
+       c.Assert(strings.Contains(err.Error(), "no such file or directory"), Equals, true)
+}
+
+func setupConfigFile(c *C, name string) *os.File {
+       // Setup a config file
+       file, err := ioutil.TempFile(os.TempDir(), name)
+       c.Check(err, IsNil)
+
+       fileContent := "ARVADOS_API_HOST=" + os.Getenv("ARVADOS_API_HOST") + "\n"
+       fileContent += "ARVADOS_API_TOKEN=" + arvadostest.DataManagerToken + "\n"
+       fileContent += "ARVADOS_API_HOST_INSECURE=" + os.Getenv("ARVADOS_API_HOST_INSECURE") + "\n"
+       fileContent += "ARVADOS_EXTERNAL_CLIENT=false\n"
+       fileContent += "ARVADOS_BLOB_SIGNING_KEY=abcdefg"
+
+       _, err = file.Write([]byte(fileContent))
+       c.Check(err, IsNil)
+
+       return file
+}
+
+func (s *DoMainTestSuite) Test_doMain_NoSrcConfig(c *C) {
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Error loading src configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMain_SrcButNoDstConfig(c *C) {
+       srcConfig := setupConfigFile(c, "src")
+       args := []string{"-replications", "3", "-src", srcConfig.Name()}
+       os.Args = append(os.Args, args...)
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Error loading dst configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMain_BadSrcConfig(c *C) {
+       args := []string{"-src", "abcd"}
+       os.Args = append(os.Args, args...)
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(strings.HasPrefix(err.Error(), "Error loading src configuration from file: Error reading config file"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithReplicationsButNoSrcConfig(c *C) {
+       args := []string{"-replications", "3"}
+       os.Args = append(os.Args, args...)
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Error loading src configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMainWithSrcAndDstConfig(c *C) {
+       srcConfig := setupConfigFile(c, "src")
+       dstConfig := setupConfigFile(c, "dst")
+       args := []string{"-src", srcConfig.Name(), "-dst", dstConfig.Name()}
+       os.Args = append(os.Args, args...)
+
+       // Start keepservers. Since we are not doing any tweaking as
+       // in setupRsync func, kcSrc and kcDst will be the same and no
+       // actual copying to dst will happen, but that's ok.
+       arvadostest.StartKeep(2, false)
+       defer arvadostest.StopKeep(2)
+
+       err := doMain()
+       c.Check(err, IsNil)
+}