Merge branch 'pr/28'
authorBrett Smith <brett@curoverse.com>
Tue, 20 Oct 2015 15:04:29 +0000 (11:04 -0400)
committerBrett Smith <brett@curoverse.com>
Tue, 20 Oct 2015 15:04:29 +0000 (11:04 -0400)
Closes #7324.

115 files changed:
apps/workbench/Gemfile.lock
apps/workbench/app/helpers/version_helper.rb
apps/workbench/app/views/application/_report_issue_popup.html.erb
apps/workbench/app/views/getting_started/_getting_started_popup.html.erb
apps/workbench/config/application.default.yml
apps/workbench/config/application.rb
apps/workbench/lib/app_version.rb [new file with mode: 0644]
doc/_includes/_install_ruby_and_bundler.liquid
doc/install/install-api-server.html.textile.liquid
doc/install/install-manual-prerequisites.html.textile.liquid
doc/install/install-sso.html.textile.liquid
doc/install/install-workbench-app.html.textile.liquid
doc/sdk/cli/subcommands.html.textile.liquid
doc/user/topics/arv-copy.html.textile.liquid
docker/compute/Dockerfile
docker/jobs/Dockerfile
sdk/cli/bin/arv
sdk/cli/bin/crunch-job
sdk/cli/test/test_arv-get.rb
sdk/cli/test/test_arv-keep-get.rb [new file with mode: 0644]
sdk/cli/test/test_arv-keep-put.rb [moved from sdk/cli/test/test_arv-put.rb with 99% similarity]
sdk/cwl/setup.py
sdk/go/arvadosclient/arvadosclient_test.go
sdk/go/arvadostest/run_servers.go
sdk/go/keepclient/keepclient.go
sdk/go/keepclient/keepclient_test.go
sdk/go/keepclient/perms.go [new file with mode: 0644]
sdk/go/keepclient/perms_test.go [new file with mode: 0644]
sdk/go/keepclient/support.go
sdk/go/streamer/streamer_test.go
sdk/go/streamer/transfer.go
sdk/pam/setup.py
sdk/python/arvados/arvfile.py
sdk/python/arvados/commands/put.py
sdk/python/arvados/keep.py
sdk/python/setup.py
sdk/python/tests/run_test_server.py
sdk/python/tests/test_arv_put.py
sdk/python/tests/test_arvfile.py
sdk/python/tests/test_collections.py
sdk/python/tests/test_keep_client.py
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/arvados/v1/schema_controller.rb
services/api/app/models/keep_service.rb
services/api/config/application.default.yml
services/api/config/initializers/app_version.rb [new file with mode: 0644]
services/api/lib/app_version.rb [new file with mode: 0644]
services/api/lib/tasks/config_check.rake
services/api/script/crunch-dispatch.rb
services/api/script/salvage_collection.rb
services/api/test/functional/arvados/v1/schema_controller_test.rb
services/api/test/unit/app_version_test.rb [new file with mode: 0644]
services/api/test/unit/keep_service_test.rb
services/arv-git-httpd/gitolite_test.go
services/datamanager/collection/collection.go
services/datamanager/collection/collection_test.go
services/datamanager/collection/testing.go
services/datamanager/datamanager.go
services/datamanager/datamanager_test.go [new file with mode: 0644]
services/datamanager/keep/keep.go
services/datamanager/keep/keep_test.go
services/datamanager/summary/canonical_string.go
services/datamanager/summary/file.go
services/datamanager/summary/pull_list.go
services/datamanager/summary/summary.go
services/datamanager/summary/summary_test.go
services/datamanager/summary/trash_list.go
services/datamanager/summary/trash_list_test.go
services/dockercleaner/setup.py
services/fuse/setup.py
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy_test.go
services/keepproxy/pkg-extras/etc/default/keepproxy [new file with mode: 0644]
services/keepproxy/pkg-extras/etc/init.d/keepproxy [new file with mode: 0755]
services/keepstore/azure_blob_volume.go [new file with mode: 0644]
services/keepstore/azure_blob_volume_test.go [new file with mode: 0644]
services/keepstore/bufferpool_test.go
services/keepstore/collision.go
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/handlers_with_generic_volume_test.go [new file with mode: 0644]
services/keepstore/keepstore.go
services/keepstore/keepstore_test.go
services/keepstore/logging_router.go
services/keepstore/mock_mutex_for_test.go
services/keepstore/perms.go
services/keepstore/perms_test.go
services/keepstore/pull_worker.go
services/keepstore/pull_worker_integration_test.go
services/keepstore/pull_worker_test.go
services/keepstore/trash_worker.go
services/keepstore/trash_worker_test.go
services/keepstore/volume.go
services/keepstore/volume_generic_test.go
services/keepstore/volume_test.go
services/keepstore/volume_unix.go
services/keepstore/volume_unix_test.go
services/keepstore/work_queue.go
services/login-sync/bin/arvados-login-sync
services/nodemanager/arvnodeman/computenode/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/computenode/driver/azure.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/setup.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_computenode_dispatch_slurm.py
services/nodemanager/tests/test_daemon.py
tools/keep-exercise/.gitignore [new file with mode: 0644]
tools/keep-exercise/keep-exercise.go [new file with mode: 0644]
tools/keep-rsync/.gitignore [new file with mode: 0644]
tools/keep-rsync/keep-rsync.go [new file with mode: 0644]
tools/keep-rsync/keep-rsync_test.go [new file with mode: 0644]

index 20b8d6164ccca273e11756928a21c1a17851f07a..8b2118ce087d220c32edf4acd35e7a934404a946 100644 (file)
@@ -74,7 +74,7 @@ GEM
       rack (>= 1.0.0)
       rack-test (>= 0.5.4)
       xpath (~> 2.0)
-    childprocess (0.5.5)
+    childprocess (0.5.6)
       ffi (~> 1.0, >= 1.0.11)
     cliver (0.3.2)
     coffee-rails (4.1.0)
@@ -98,7 +98,7 @@ GEM
     fast_stack (0.1.0)
       rake
       rake-compiler
-    ffi (1.9.6)
+    ffi (1.9.10)
     flamegraph (0.1.0)
       fast_stack
     google-api-client (0.6.4)
@@ -139,7 +139,7 @@ GEM
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1)
       railties (> 3.1, < 5)
-    multi_json (1.11.1)
+    multi_json (1.11.2)
     multipart-post (1.2.0)
     net-scp (1.2.1)
       net-ssh (>= 2.6.5)
@@ -192,7 +192,7 @@ GEM
     ref (1.0.5)
     ruby-debug-passenger (0.2.0)
     ruby-prof (0.15.2)
-    rubyzip (1.1.6)
+    rubyzip (1.1.7)
     rvm-capistrano (1.5.5)
       capistrano (~> 2.15.4)
     sass (3.4.9)
@@ -202,7 +202,7 @@ GEM
       sprockets (>= 2.8, < 4.0)
       sprockets-rails (>= 2.0, < 4.0)
       tilt (~> 1.1)
-    selenium-webdriver (2.44.0)
+    selenium-webdriver (2.48.1)
       childprocess (~> 0.5)
       multi_json (~> 1.0)
       rubyzip (~> 1.0)
@@ -239,7 +239,7 @@ GEM
       execjs (>= 0.3.0)
       json (>= 1.8.0)
     uuidtools (2.1.5)
-    websocket (1.2.1)
+    websocket (1.2.2)
     websocket-driver (0.5.1)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.1)
@@ -294,3 +294,6 @@ DEPENDENCIES
   therubyracer
   uglifier (>= 1.0.3)
   wiselinks
+
+BUNDLED WITH
+   1.10.6
index 6cae78f53efc495f4a54797b39dc6e6e889492a3..5c15986601e1b9b69d772d5391b88146fbec33c9 100644 (file)
@@ -1,30 +1,12 @@
 module VersionHelper
-  # api_version returns the git commit hash for the API server's
-  # current version.  It is extracted from api_version_text, which
-  # returns the source_version provided by the discovery document and
-  # may have the word "-modified" appended to it (if the API server is
-  # running from a locally modified repository).
-
-  def api_version
-    api_version_text.sub(/[^[:xdigit:]].*/, '')
-  end
-
-  def api_version_text
+  # Get the source_version given in the API server's discovery
+  # document.
+  def api_source_version
     arvados_api_client.discovery[:source_version]
   end
 
-  # wb_version and wb_version_text provide the same strings for the
-  # code version that this Workbench is currently running.
-
-  def wb_version
-    Rails.configuration.source_version
-  end
-
-  def wb_version_text
-    wb_version + (Rails.configuration.local_modified or '')
-  end
-
+  # URL for browsing source code for the given version.
   def version_link_target version
-    "https://arvados.org/projects/arvados/repository/changes?rev=#{version}"
+    "https://arvados.org/projects/arvados/repository/changes?rev=#{version.sub(/-.*/, "")}"
   end
 end
index 1c964abfe0491b8ad1c77c02767aaf845927093e..1f66146e2626cbfc705a94ea510ba6c062715e84 100644 (file)
@@ -3,18 +3,15 @@
   arvados_base = Rails.configuration.arvados_v1_base
   support_email = Rails.configuration.support_email_address
 
-  api_version_link = link_to api_version_text, version_link_target(api_version)
-  wb_version_link = link_to wb_version_text, version_link_target(wb_version)
-
   additional_info = {}
   additional_info['Current location'] = params[:current_location]
   additional_info['User UUID'] = current_user.uuid if current_user
 
   additional_info_str = additional_info.map {|k,v| "#{k}=#{v}"}.join("\n")
 
-  additional_info['api_version'] = api_version_text
+  additional_info['api_source_version'] = api_source_version
   additional_info['generated_at'] = generated_at
-  additional_info['workbench_version'] = wb_version_text
+  additional_info['workbench_version'] = AppVersion.hash
   additional_info['arvados_base'] = arvados_base
   additional_info['support_email'] = support_email
   additional_info['error_message'] = params[:error_message] if params[:error_message]
         <div class="form-group">
           <label for="wb_version" class="col-sm-4 control-label"> Workbench version </label>
           <div class="col-sm-8">
-            <p class="form-control-static" name="wb_version"><%= wb_version_link %></p>
+            <p class="form-control-static" name="wb_version">
+              <%= link_to AppVersion.hash, version_link_target(AppVersion.hash) %>
+            </p>
           </div>
         </div>
 
         <div class="form-group">
           <label for="server_version" class="col-sm-4 control-label"> API version </label>
           <div class="col-sm-8">
-            <p class="form-control-static" name="server_version"><%= api_version_link %></p>
+            <p class="form-control-static" name="server_version">
+              <%= link_to api_source_version, version_link_target(api_source_version) %>
+            </p>
           </div>
         </div>
 
index 0db0567ec98b22495b6e063479a60ba753b5bc83..3020a1249bca287a41103b6f5924c134b2090adc 100644 (file)
@@ -154,7 +154,7 @@ div.figure p {
             </li><li>
               <strong>Use existing pipelines</strong>: Use best-practices pipelines on your own data with the click of a button.
             </li><li>
-              <strong>Open-source</strong>: Arvados is completely open-source. Check out our <a href="http://arvados.org">developer site</a>.
+              <strong>Open source</strong>: Arvados is completely open source. Check out our <a href="http://dev.arvados.org">developer site</a>.
             </li>
           </ol>
           <p style="margin-top: 1em;">
index 744c0c3ba3b7b3b8f1340df03045b554eab1ea82..00959bbb3bea30568e62ddb82c9f0d7f733fb44d 100644 (file)
@@ -11,7 +11,7 @@
 #   template_uuid: is the uuid of the template to be executed
 #   input_paths: an array of inputs for the pipeline. Use either a collection's "uuid"
 #     or a file's "uuid/file_name" path in this array. If the pipeline does not require
-#     any inputs, this can be omitted. 
+#     any inputs, this can be omitted.
 #   max_wait_seconds: max time in seconds to wait for the pipeline run to complete.
 #     Default value of 30 seconds is used when this value is not provided.
 diagnostics:
@@ -45,7 +45,6 @@ development:
   assets.debug: true
   profiling_enabled: true
   site_name: Arvados Workbench (dev)
-  local_modified: "<%= '-modified' if `git status -s` != '' %>"
 
   # API server configuration
   arvados_login_base: ~
@@ -151,7 +150,7 @@ common:
 
   # Below is a sample setting of user_profile_form_fields config parameter.
   # This configuration parameter should be set to either false (to disable) or
-  # to an array as shown below. 
+  # to an array as shown below.
   # Configure the list of input fields to be displayed in the profile page
   # using the attribute "key" for each of the input fields.
   # This sample shows configuration with one required and one optional form fields.
@@ -183,9 +182,11 @@ common:
   # the profile page.
   user_profile_form_message: Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.
 
-  # source_version
-  source_version: "<%= `git log -n 1 --format=%h`.strip %>"
-  local_modified: false
+  # Override the automatic version string. With the default value of
+  # false, the version string is read from git-commit.version in
+  # Rails.root (included in vendor packages) or determined by invoking
+  # "git log".
+  source_version: false
 
   # report notification to and from addresses
   issue_reporter_email_from: arvados@example.com
index 4ac68198e8bd6fa6404512ad5479397223de1bc8..d1c7934ab3ba16b57f61425edcc48a57b81df1e3 100644 (file)
@@ -12,6 +12,7 @@ module ArvadosWorkbench
 
     # Custom directories with classes and modules you want to be autoloadable.
     # config.autoload_paths += %W(#{config.root}/extras)
+    config.autoload_paths += %W(#{config.root}/lib)
 
     # Only load the plugins named here, in the order given (default is alphabetical).
     # :all can be used as a placeholder for all plugins not explicitly named.
diff --git a/apps/workbench/lib/app_version.rb b/apps/workbench/lib/app_version.rb
new file mode 100644 (file)
index 0000000..48cb8f6
--- /dev/null
@@ -0,0 +1,53 @@
+# If you change this file, you'll probably also want to make the same
+# changes in services/api/lib/app_version.rb.
+
+class AppVersion
+  def self.git(*args, &block)
+    IO.popen(["git", "--git-dir", ".git"] + args, "r",
+             chdir: Rails.root.join('../..'),
+             err: "/dev/null",
+             &block)
+  end
+
+  def self.forget
+    @hash = nil
+  end
+
+  # Return abbrev commit hash for current code version: "abc1234", or
+  # "abc1234-modified" if there are uncommitted changes. If present,
+  # return contents of {root}/git-commit.version instead.
+  def self.hash
+    if (cached = Rails.configuration.source_version || @hash)
+      return cached
+    end
+
+    # Read the version from our package's git-commit.version file, if available.
+    begin
+      @hash = IO.read(Rails.root.join("git-commit.version")).strip
+    rescue Errno::ENOENT
+    end
+
+    if @hash.nil? or @hash.empty?
+      begin
+        local_modified = false
+        git("status", "--porcelain") do |git_pipe|
+          git_pipe.each_line do |_|
+            STDERR.puts _
+            local_modified = true
+            # Continue reading the pipe so git doesn't get SIGPIPE.
+          end
+        end
+        if $?.success?
+          git("log", "-n1", "--format=%H") do |git_pipe|
+            git_pipe.each_line do |line|
+              @hash = line.chomp[0...8] + (local_modified ? '-modified' : '')
+            end
+          end
+        end
+      rescue SystemCallError
+      end
+    end
+
+    @hash || "unknown"
+  end
+end
index 502e2bcb3a41e1bdacd8f0c55a1afde95b7a55d6..e79cffb7f86dd9adf5a622b231ceda25e057f4e4 100644 (file)
@@ -5,7 +5,6 @@ h4(#rvm). *Option 1: Install with RVM*
 <notextile>
 <pre><code><span class="userinput">sudo gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
 \curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.1
-sudo adduser "$USER" rvm
 </span></code></pre></notextile>
 
 Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
@@ -46,7 +45,7 @@ Install prerequisites for Ubuntu 12.04 or 14.04:
 <pre><code><span class="userinput">sudo apt-get install \
     gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
     libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
-    libncurses5-dev automake libtool bison pkg-config libffi-dev
+    libncurses5-dev automake libtool bison pkg-config libffi-dev curl
 </span></code></pre></notextile>
 
 Build and install Ruby:
@@ -54,8 +53,8 @@ Build and install Ruby:
 <notextile>
 <pre><code><span class="userinput">mkdir -p ~/src
 cd ~/src
-curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz | tar xz
-cd ruby-2.1.6
+curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.7.tar.gz | tar xz
+cd ruby-2.1.7
 ./configure --disable-install-rdoc
 make
 sudo make install
index fb43f783f04ffc5346da7cf46224ce8856eecb02..695584fa247f2176db5d690adebf4b9002389f0d 100644 (file)
@@ -203,7 +203,7 @@ For best performance, we recommend you use Nginx as your Web server front-end, w
 
 <notextile>
 <ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
 
 <li><p>Puma is already included with the API server's gems.  We recommend you run it as a service under <a href="http://smarden.org/runit/">runit</a> or a similar tool.  Here's a sample runit script for that:</p>
 
index 52a51a191aafb90188d1906583b419f0135ca49b..a26370d21bf5f4bb877a7ece60957ff9b1eff6d1 100644 (file)
@@ -42,7 +42,7 @@ baseurl=http://rpm.arvados.org/CentOS/$releasever/os/$basearch/
 
 h3. Debian and Ubuntu
 
-Packages are available for Debian 7 ("wheezy"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
+Packages are available for Debian 7 ("wheezy"), Debian 8 ("jessie"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
 
 First, register the Curoverse signing key in apt's database:
 
@@ -53,6 +53,7 @@ Configure apt to retrieve packages from the Arvados package repository. This com
 table(table table-bordered table-condensed).
 |OS version|Command|
 |Debian 7 ("wheezy")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Debian 8 ("jessie")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ jessie main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 12.04 ("precise")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ precise main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 14.04 ("trusty")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 
index af999e95a922b14bbda53339b5d7eb29c20bae6b..56c7a4b337eb5eb0c2b6c8fbd2aa47675e353b7c 100644 (file)
 ---
 layout: default
 navsection: installguide
-title: Install Single Sign On (SSO) server
+title: Install the Single Sign On (SSO) server
 ...
 
-h2(#dependencies). Install dependencies
+h2(#dependencies). Install prerequisites
 
-h3(#install_git_curl). Install git and curl
-
-{% include 'install_git_curl' %}
+The Arvados package repository includes an SSO server package that can help automate much of the deployment.
 
 h3(#install_ruby_and_bundler). Install Ruby and Bundler
 
 {% include 'install_ruby_and_bundler' %}
 
-h3(#install_postgres). Install PostgreSQL
+h3(#install_web_server). Set up a Web server
+
+For best performance, we recommend you use Nginx as your Web server frontend with a Passenger backend to serve the SSO server. The Passenger team provides "Nginx + Passenger installation instructions":https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html.
 
-{% include 'install_postgres' %}
+Follow the instructions until you see the section that says you are ready to deploy your Ruby application on the production server.
 
-h2(#install). Install SSO server
+h2(#install). Install the SSO server
 
-h3. Get SSO server code and run bundle
+On a Debian-based system, install the following package:
 
 <notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
-~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">bundle install --without=development</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install arvados-sso-server</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following package:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install arvados-sso-server</span>
+</code></pre>
+</notextile>
 
-h2. Configure the SSO server
+h2(#configure). Configure the SSO server
 
-First, copy the example configuration file:
+The package has installed three configuration files in @/etc/arvados/sso@:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
-</code></pre></notextile>
+<pre><code>/etc/arvados/sso/application.yml
+/etc/arvados/sso/database.yml
+/etc/arvados/sso/production.rb
+</code></pre>
+</notextile>
+
+The SSO server runs from the @/var/www/arvados-sso/current/@ directory. The files @/var/www/arvados-sso/current/config/application.yml@, @/var/www/arvados-sso/current/config/database.yml@ and @/var/www/arvados-sso/current/config/environments/production.rb@ are symlinked to the configuration files in @/etc/arvados/sso/@.
 
 The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience only.
 
-Consult @config/application.default.yml@ for a full list of configuration options.  Local configuration goes in @config/application.yml@, do not edit @config/application.default.yml@.
+Consult @config/application.default.yml@ for a full list of configuration options.  Local configuration goes in @/etc/arvados/sso/application.yml@, do not edit @config/application.default.yml@.
 
 h3(#uuid_prefix). uuid_prefix
 
 Generate a uuid prefix for the single sign on service.  This prefix is used to identify user records as originating from this site.  It must be exactly 5 lowercase ASCII letters and/or digits.  You may use the following snippet to generate a uuid prefix:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
 abcde
 </code></pre></notextile>
 
-Edit @config/application.yml@ and set @uuid_prefix@ in the "common" section.
+Edit @/etc/arvados/sso/application.yml@ and set @uuid_prefix@ in the "common" section.
 
 h3(#secret_token). secret_token
 
 Generate a new secret token for signing cookies:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 </code></pre></notextile>
 
-Edit @config/application.yml@ and set @secret_token@ in the "common" section.
+Edit @/etc/arvados/sso/application.yml@ and set @secret_token@ in the "common" section.
+
+There are other configuration options in @/etc/arvados/sso/application.yml@. See the "Authentication methods":install-sso.html#authentication_methods section below for more details.
 
 h2(#database). Set up the database
 
-Generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
+If PostgreSQL was newly installed as a dependency of the @arvados-sso-server@ package, you will need to start the service.
+
+On a Debian-based system:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">sudo service postgresql start</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, we also need to initialize the database system:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo service postgresql initdb</span>
+~$ <span class="userinput">sudo service postgresql start</span>
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+
+If you are installing on CentOS6, you will need to modify PostgreSQL's configuration to allow password authentication for local users. The default configuration allows 'ident' only. The following commands will make the configuration change, and restart PostgreSQL for it to take effect.
+<br/>
+<notextile>
+<pre><code>~$ <span class="userinput">sudo sed -i -e "s/127.0.0.1\/32          ident/127.0.0.1\/32          md5/" /var/lib/pgsql/data/pg_hba.conf</span>
+~$ <span class="userinput">sudo sed -i -e "s/::1\/128               ident/::1\/128               md5/" /var/lib/pgsql/data/pg_hba.conf</span>
+~$ <span class="userinput">sudo service postgresql restart</span>
+</code></pre>
+</notextile>
+{% include 'notebox_end' %}
+
+
+Next, generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
+
+<notextile>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
 abcdefghijklmnopqrstuvwxyz012345689
 </code></pre></notextile>
 
+Configure the SSO server to connect to your database by updating @/etc/arvados/sso/database.yml@. Replace the @xxxxxxxx@ database password placeholder with the new password you generated above. Be sure to update the @production@ section.
+
+<notextile>
+<pre><code>~$ <span class="userinput">editor /etc/arvados/sso/database.yml</span>
+</code></pre></notextile>
+
 Create a new database user with permission to create its own databases.
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
 Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
 Enter it again: <span class="userinput">paste-database-password-you-generated</span>
 </code></pre></notextile>
 
-Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.  If you are planning a production system, update the @production@ section, otherwise use @development@.
+Rails will take care of creating the database, based on the information from @/etc/arvados/sso/database.yml@. 
+
+Alternatively, if the database user you intend to use for the SSO server should not be allowed to create new databases, the user and the database can be created like this:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.example config/database.yml</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">edit config/database.yml</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --encrypted -R -S --pwprompt arvados_sso</span>
+Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
+Enter it again: <span class="userinput">paste-database-password-you-generated</span>
+~$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
 </code></pre></notextile>
 
-Create and initialize the database. If you are planning a production system, choose the @production@ rails environment, otherwise use @development@.
+h2(#reconfigure_package). Reconfigure the package
+
+Now that the @/etc/arvados/sso/application.yml@ and @/etc/arvados/sso/database.yml@ files have been updated, we need to reconfigure our installed package. Doing so will create and/or initialize the database and precompile the assets.
+
+On a Debian-based system:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:setup</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo dpkg-reconfigure arvados-sso-server</span>
+</code></pre>
+</notextile>
 
-Alternatively, if the database user you intend to use for the SSO server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above:
+On a Red Hat-based system, we need to reinstall the package instead:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:schema:load</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo yum reinstall arvados-sso-server</span>
+</code></pre>
+</notextile>
 
 h2(#client). Create arvados-server client
 
 Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the values for @sso_app_id@ and @sso_app_secret@ in the "API server's SSO settings.":install-api-server.html#omniauth
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+~$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
 :001 &gt; <span class="userinput">c = Client.new</span>
 :002 &gt; <span class="userinput">c.name = "joshid"</span>
 :003 &gt; <span class="userinput">c.app_id = "arvados-server"</span>
@@ -118,15 +174,56 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 </code></pre>
 </notextile>
 
-h2(#assets). Precompile assets
+h2(#configure_web_server). Configure your web server
 
-If you are running in the production environment, you must precompile the assets:
+Edit the http section of your Nginx configuration to run the Passenger server and act as a frontend for it. You might add a block like the following, adding SSL and logging parameters to taste:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
+<pre><code>server {
+  listen 127.0.0.1:8900;
+  server_name localhost-sso;
+
+  root   /var/www/arvados-sso/current/public;
+  index  index.html;
+
+  passenger_enabled on;
+  # If you're not using RVM, comment out the line below.
+  passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+}
+
+upstream sso {
+  server     127.0.0.1:8900  fail_timeout=10s;
+}
+
+proxy_http_version 1.1;
+
+server {
+  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name  auth.<span class="userinput">your.domain</span>;
+
+  ssl on;
+  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  index  index.html;
+
+  location / {
+    proxy_pass            http://sso;
+    proxy_redirect        off;
+    proxy_connect_timeout 90s;
+    proxy_read_timeout    300s;
+
+    proxy_set_header      X-Forwarded-Proto https;
+    proxy_set_header      Host $http_host;
+    proxy_set_header      X-Real-IP $remote_addr;
+    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
 </code></pre>
 </notextile>
 
+Finally, restart Nginx and your Arvados SSO server should be up and running. You can verify that by visiting the URL you configured your Nginx web server to listen on in the server section above (port 443). Read on if you want to configure your Arvados SSO server to use a different authentication backend.
+
 h2(#authentication_methods). Authentication methods
 
 Authentication methods are configured in @application.yml@.  Currently three authentication methods are supported: local accounts, LDAP, and Google+.  If neither Google+ nor LDAP are enabled, the SSO server defaults to local user accounts.   Only one authentication mechanism should be in use at a time.
@@ -150,7 +247,7 @@ For more information about configuring backend support for sending email (requir
 If @allow_account_registration@ is false, you may manually create local accounts on the SSO server from the rails console:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+<pre><code>~$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
 :001 &gt; <span class="userinput">user = User.new(:email =&gt; "test@example.com")</span>
 :002 &gt; <span class="userinput">user.password = "passw0rd"</span>
 :003 &gt; <span class="userinput">user.save!</span>
@@ -210,76 +307,4 @@ In order to use Google+ authentication, you must use the <a href="https://consol
   google_oauth2_client_id: <span class="userinput">"---YOUR---CLIENT---ID---HERE--"-</span>
   google_oauth2_client_secret: <span class="userinput">"---YOUR---CLIENT---SECRET---HERE--"-</span></code></pre></notextile>
 
-h2(#start). Set up a Web server
-
-For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend to serve the SSO server.  To do that:
-
-<notextile>
-<ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
-
-<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for it.  You might add a block like the following, adding SSL and logging parameters to taste:</p>
-
-<pre><code>server {
-  listen 127.0.0.1:8900;
-  server_name localhost-sso;
-
-  root   <span class="userinput">/YOUR/PATH/TO/sso-devise-omniauth-provider/public</span>;
-  index  index.html index.htm index.php;
-
-  passenger_enabled on;
-  # If you're using RVM, uncomment the line below.
-  #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
-}
-
-upstream sso {
-  server     127.0.0.1:8900  fail_timeout=10s;
-}
-
-proxy_http_version 1.1;
-
-server {
-  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name  auth.<span class="userinput">your.domain</span>;
-
-  ssl on;
-  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
-  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
-
-  index  index.html index.htm index.php;
-
-  location / {
-    proxy_pass            http://sso;
-    proxy_redirect        off;
-    proxy_connect_timeout 90s;
-    proxy_read_timeout    300s;
-
-    proxy_set_header      X-Forwarded-Proto https;
-    proxy_set_header      Host $http_host;
-    proxy_set_header      X-Real-IP $remote_addr;
-    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
-  }
-}
-</code></pre>
-</li>
-
-<li>Restart Nginx.</li>
-
-</ol>
-</notextile>
 
-{% include 'notebox_begin' %}
-
-If you see the following warning "you may safely ignore it":https://stackoverflow.com/questions/10374871/no-secret-option-provided-to-racksessioncookie-warning:
-
-<pre>
-SECURITY WARNING: No secret option provided to Rack::Session::Cookie.
-This poses a security threat. It is strongly recommended that you
-provide a secret to prevent exploits that may be possible from crafted
-cookies. This will not be supported in future versions of Rack, and
-future versions will even invalidate your existing user cookies.
-
-Called from: /var/lib/gems/2.1.0/gems/actionpack-3.2.8/lib/action_dispatch/middleware/session/abstract_store.rb:28:in `initialize'.
-</pre>
-
-{% include 'notebox_end' %}
index 52a69f502b1d6e65ad920230ab91599f86be0b19..22fc1557a8fae0336fff6f0f80e89e8f8d1b1262 100644 (file)
@@ -6,7 +6,7 @@ title: Install Workbench
 
 h2. Install prerequisites
 
-The Arvados package repository includes Workbench server package that can help automate much of the deployment.
+The Arvados package repository includes Workbench server package that can help automate much of the deployment.
 
 h3(#install_ruby_and_bundler). Install Ruby and Bundler
 
@@ -93,7 +93,7 @@ For best performance, we recommend you use Nginx as your Web server front-end, w
 
 <notextile>
 <ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
 
 <li>If you're deploying on CentOS and using the python27 Software Collection, configure Nginx to use it:
 
index 018213c1a1815061f3265a85040937dcb53652a7..ca494fe17a570e0868af2157fcc92dda6b480cc2 100644 (file)
@@ -21,6 +21,20 @@ Options:
 </pre>
 </notextile>
 
+h3(#arv-get). arv get
+
+@arv get@ can be used to get a textual representation of Arvados objects from the command line. The output can be limited to a subset of the object's fields. This command can be used with only the knowledge of an object's UUID.
+
+<notextile>
+<pre>
+$ <code class="userinput">arv get --help</code>
+Usage: arv [--format json|yaml] get [uuid] [fields...]
+
+Fetch the specified Arvados object, select the specified fields,
+and print a text representation.
+</pre>
+</notextile>
+
 h3(#arv-edit). arv edit
 
 @arv edit@ can be used to edit Arvados objects from the command line. Arv edit opens up the editor of your choice (set the EDITOR environment variable) with the json or yaml description of the object. Saving the file will update the Arvados object on the API server, if it passes validation.
@@ -157,7 +171,7 @@ $ <code class="userinput">arv keep --help</code>
 Usage: arv keep [method] [--parameters]
 Use 'arv keep [method] --help' to get more information about specific methods.
 
-Available methods: ls, get, put, less, check, docker
+Available methods: ls, get, put, docker
 </pre>
 </notextile>
 
index 07e60486275104700bf82d60bc07f405b3326ae1..1ec80a619b7368ecc0b1b44ddaa74c001374ac92 100644 (file)
@@ -11,7 +11,7 @@ This tutorial describes how to copy Arvados objects from one cluster to another
 
 h2. arv-copy
 
-@arv-copy@ allows users to copy collections, pipeline templates, and pipeline instances from one cluster to another. By default, @arv-copy@ will recursively go through a template or instance and copy all dependencies associated with the object.
+@arv-copy@ allows users to copy collections and pipeline templates from one cluster to another. By default, @arv-copy@ will recursively go through a template and copy all dependencies associated with the object.
 
 For example, let's copy from our <a href="https://cloud.curoverse.com/">beta cloud instance *qr1hi*</a> to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi.
 
@@ -52,29 +52,29 @@ For example, this will copy the collection to project dst_cluster-j7d0g-a894213u
 </code></pre>
 </notextile>
 
-h3. How to copy a pipeline template or pipeline instance
+h3. How to copy a pipeline template
 
 {% include 'arv_copy_expectations' %}
 
-We will use the uuid @qr1hi-d1hrv-nao0ohw8y7dpf84@ as an example pipeline instance.
+We will use the uuid @qr1hi-p5p6p-9pkaxt6qjnkxhhu@ as an example pipeline template.
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
 To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
  * [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
-arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-d1hrv-rym2h5ub9m8ofwj
+arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-p5p6p-rym2h5ub9m8ofwj
 </code></pre>
 </notextile>
 
 New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@. 
 
-By default, if you copy a pipeline instance recursively, you will find that the template as well as all the dependencies are in your home project.
+By default, if you copy a pipeline template recursively, you will find that the template as well as all the dependencies are in your home project.
 
 If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag.
 
 For example, we can copy the same object using this tag.
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
 </code></pre>
 </notextile>
index f2f48da79453f8568442be224bded0210a91a677..948f6c73d4e486973e733c3ea004de3726b4e063 100644 (file)
@@ -4,7 +4,9 @@ FROM arvados/slurm
 MAINTAINER Ward Vandewege <ward@curoverse.com>
 
 RUN apt-get update -q
-RUN apt-get install -qy supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse crunchstat python-arvados-fuse cron dnsmasq
+## 20150915 nico -- fuse.postint has sporatic failures, spliting this up to see if it helps
+RUN apt-get install -qy fuse
+RUN apt-get install -qy supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse crunchstat python-arvados-fuse cron dnsmasq
 
 ADD fuse.conf /etc/fuse.conf
 RUN chmod 644 /etc/fuse.conf
index 41e4aea1ddcf0f4b69a6735b72fcbecf7b942a72..0d7295873f723e637cf76413e01c16c6a2be5d95 100644 (file)
@@ -8,7 +8,7 @@ ADD apt.arvados.org.list /etc/apt/sources.list.d/
 RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
 RUN apt-get update -q
 
-RUN apt-get install -qy git python-minimal python-virtualenv python-arvados-python-client
+RUN apt-get install -qy git python-pip python-virtualenv python-arvados-python-client python-dev libcurl4-gnutls-dev
 
 RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3
 
index b757855b1f22653eef4a6faaa96cc2c684214e86..185a5b0673f1dc1c5afc5ed6530386d8255daaf4 100755 (executable)
@@ -5,6 +5,7 @@
 # Ward Vandewege <ward@curoverse.com>
 
 require 'fileutils'
+require 'shellwords'
 
 if RUBY_VERSION < '1.9.3' then
   abort <<-EOS
@@ -85,7 +86,15 @@ def init_config
 end
 
 
-subcommands = %w(copy create edit keep pipeline run tag ws)
+subcommands = %w(copy create edit get keep pipeline run tag ws)
+
+def exec_bin bin, opts
+  bin_path = `which #{bin.shellescape}`.strip
+  if bin_path.empty?
+    raise "#{bin}: command not found"
+  end
+  exec bin_path, *opts
+end
 
 def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
   case subcommand
@@ -93,28 +102,27 @@ def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
     arv_create client, arvados, global_opts, remaining_opts
   when 'edit'
     arv_edit client, arvados, global_opts, remaining_opts
+  when 'get'
+    arv_get client, arvados, global_opts, remaining_opts
   when 'copy', 'tag', 'ws', 'run'
-    exec `which arv-#{subcommand}`.strip, *remaining_opts
+    exec_bin "arv-#{subcommand}", remaining_opts
   when 'keep'
     @sub = remaining_opts.shift
     if ['get', 'put', 'ls', 'normalize'].index @sub then
       # Native Arvados
-      exec `which arv-#{@sub}`.strip, *remaining_opts
-    elsif ['less', 'check'].index @sub then
-      # wh* shims
-      exec `which wh#{@sub}`.strip, *remaining_opts
+      exec_bin "arv-#{@sub}", remaining_opts
     elsif @sub == 'docker'
-      exec `which arv-keepdocker`.strip, *remaining_opts
+      exec_bin "arv-keepdocker", remaining_opts
     else
       puts "Usage: arv keep [method] [--parameters]\n"
       puts "Use 'arv keep [method] --help' to get more information about specific methods.\n\n"
-      puts "Available methods: ls, get, put, less, check, docker"
+      puts "Available methods: ls, get, put, docker"
     end
     abort
   when 'pipeline'
     sub = remaining_opts.shift
     if sub == 'run'
-      exec `which arv-run-pipeline-instance`.strip, *remaining_opts
+      exec_bin "arv-run-pipeline-instance", remaining_opts
     else
       puts "Usage: arv pipeline [method] [--parameters]\n"
       puts "Use 'arv pipeline [method] --help' to get more information about specific methods.\n\n"
@@ -150,14 +158,7 @@ end
 
 def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
 
-  content = case global_opts[:format]
-            when 'json'
-              Oj.dump(initial_obj, :indent => 1)
-            when 'yaml'
-              initial_obj.to_yaml
-            else
-              abort "Unrecognized format #{global_opts[:format]}"
-            end
+  content = get_obj_content initial_obj, global_opts
 
   tmp_file = Tempfile.new([tmp_stem, ".#{global_opts[:format]}"])
   tmp_file.write(content)
@@ -182,6 +183,8 @@ def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
                    Oj.load(newcontent)
                  when 'yaml'
                    YAML.load(newcontent)
+                 else
+                   abort "Unrecognized format #{global_opts[:format]}"
                  end
 
         yield newobj
@@ -246,20 +249,7 @@ def check_response result
   results
 end
 
-def arv_edit client, arvados, global_opts, remaining_opts
-  uuid = remaining_opts.shift
-  if uuid.nil? or uuid == "-h" or uuid == "--help"
-    puts head_banner
-    puts "Usage: arv edit [uuid] [fields...]\n\n"
-    puts "Fetch the specified Arvados object, select the specified fields, \n"
-    puts "open an interactive text editor on a text representation (json or\n"
-    puts "yaml, use --format) and then update the object.  Will use 'nano'\n"
-    puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
-    exit 255
-  end
-
-  # determine controller
-
+def lookup_uuid_rsc arvados, uuid
   m = /([a-z0-9]{5})-([a-z0-9]{5})-([a-z0-9]{15})/.match uuid
   if !m
     if /^[a-f0-9]{32}/.match uuid
@@ -282,6 +272,11 @@ def arv_edit client, arvados, global_opts, remaining_opts
     abort "Could not determine resource type #{m[2]}"
   end
 
+  return rsc
+end
+
+def fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
   begin
     result = client.execute(:api_method => eval('arvados.' + rsc + '.get'),
                             :parameters => {"uuid" => uuid},
@@ -289,15 +284,45 @@ def arv_edit client, arvados, global_opts, remaining_opts
                             :headers => {
                               authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
                             })
-    oldobj = check_response result
+    obj = check_response result
   rescue => e
     abort "Server error: #{e}"
   end
 
   if remaining_opts.length > 0
-    oldobj.select! { |k, v| remaining_opts.include? k }
+    obj.select! { |k, v| remaining_opts.include? k }
+  end
+
+  return obj
+end
+
+def get_obj_content obj, global_opts
+  content = case global_opts[:format]
+            when 'json'
+              Oj.dump(obj, :indent => 1)
+            when 'yaml'
+              obj.to_yaml
+            else
+              abort "Unrecognized format #{global_opts[:format]}"
+            end
+  return content
+end
+
+def arv_edit client, arvados, global_opts, remaining_opts
+  uuid = remaining_opts.shift
+  if uuid.nil? or uuid == "-h" or uuid == "--help"
+    puts head_banner
+    puts "Usage: arv edit [uuid] [fields...]\n\n"
+    puts "Fetch the specified Arvados object, select the specified fields, \n"
+    puts "open an interactive text editor on a text representation (json or\n"
+    puts "yaml, use --format) and then update the object.  Will use 'nano'\n"
+    puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
+    exit 255
   end
 
+  rsc = lookup_uuid_rsc arvados, uuid
+  oldobj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
   edit_and_commit_object oldobj, uuid, global_opts do |newobj|
     newobj.select! {|k| newobj[k] != oldobj[k]}
     if !newobj.empty?
@@ -318,6 +343,24 @@ def arv_edit client, arvados, global_opts, remaining_opts
   exit 0
 end
 
+def arv_get client, arvados, global_opts, remaining_opts
+  uuid = remaining_opts.shift
+  if uuid.nil? or uuid == "-h" or uuid == "--help"
+    puts head_banner
+    puts "Usage: arv [--format json|yaml] get [uuid] [fields...]\n\n"
+    puts "Fetch the specified Arvados object, select the specified fields,\n"
+    puts "and print a text representation.\n"
+    exit 255
+  end
+
+  rsc = lookup_uuid_rsc arvados, uuid
+  obj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+  content = get_obj_content obj, global_opts
+
+  puts content
+  exit 0
+end
+
 def arv_create client, arvados, global_opts, remaining_opts
   types = resource_types(arvados.discovery_document)
   create_opts = Trollop::options do
index fc5005ab9cf089a94f0fd5d1f74ff5cfd7220f04..5539012c49e19e2b9afbeba26850aabbec9405d1 100755 (executable)
@@ -390,12 +390,12 @@ if (!defined $no_clear_tmp) {
   my $cleanpid = fork();
   if ($cleanpid == 0)
   {
-    # Find FUSE mounts that look like Keep mounts (the mount path has the
-    # word "keep") and unmount them.  Then clean up work directories.
-    # TODO: When #5036 is done and widely deployed, we can get rid of the
-    # regular expression and just unmount everything with type fuse.keep.
+    # Find FUSE mounts under $CRUNCH_TMP and unmount them.
+    # Then clean up work directories.
+    # TODO: When #5036 is done and widely deployed, we can limit mount's
+    # -t option to simply fuse.keep.
     srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-          ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+          ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
     exit (1);
   }
   while (1)
@@ -980,7 +980,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     my $gotsome
        = readfrompipes ()
        + reapchildren ();
-    if (!$gotsome)
+    if (!$gotsome || ($latest_refresh + 2 < scalar time))
     {
       check_refresh_wanted();
       check_squeue();
@@ -2205,11 +2205,12 @@ if (-d $sdk_root) {
 my $python_dir = "$install_dir/python";
 if ((-d $python_dir) and can_run("python2.7")) {
   open(my $egg_info_pipe, "-|",
-       "python2.7 \Q$python_dir/setup.py\E --quiet egg_info 2>&1 >/dev/null");
+       "python2.7 \Q$python_dir/setup.py\E egg_info 2>&1 >/dev/null");
   my @egg_info_errors = <$egg_info_pipe>;
   close($egg_info_pipe);
+
   if ($?) {
-    if (@egg_info_errors and ($egg_info_errors[-1] =~ /\bgit\b/)) {
+    if (@egg_info_errors and (($egg_info_errors[-1] =~ /\bgit\b/) or ($egg_info_errors[-1] =~ /\[Errno 2\]/))) {
       # egg_info apparently failed because it couldn't ask git for a build tag.
       # Specify no build tag.
       open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
@@ -2218,7 +2219,7 @@ if ((-d $python_dir) and can_run("python2.7")) {
     } else {
       my $egg_info_exit = $? >> 8;
       foreach my $errline (@egg_info_errors) {
-        print STDERR_ORIG $errline;
+        warn $errline;
       }
       warn "python setup.py egg_info failed: exit $egg_info_exit";
       exit ($egg_info_exit || 1);
index 5e58014cbfa10d3b9b67a8b7cddca8b8676f646c..2e2bba562f1e7dedc09fd3f4491da424cb7a5850 100644 (file)
 require 'minitest/autorun'
-require 'digest/md5'
+require 'json'
+require 'yaml'
 
+# Black box tests for 'arv get' command.
 class TestArvGet < Minitest::Test
-  def setup
-    begin
-      Dir.mkdir './tmp'
-    rescue Errno::EEXIST
-    end
-    @@foo_manifest_locator ||= `echo -n foo | ./bin/arv-put --filename foo --no-progress -`.strip
-    @@baz_locator ||= `echo -n baz | ./bin/arv-put --as-raw --no-progress -`.strip
-    @@multilevel_manifest_locator ||= `echo ./foo/bar #{@@baz_locator} 0:3:baz | ./bin/arv-put --as-raw --no-progress -`.strip
-  end
+  # UUID for an Arvados object that does not exist
+  NON_EXISTENT_OBJECT_UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+  # Name of field of Arvados object that can store any (textual) value
+  STORED_VALUE_FIELD_NAME = "name"
+  # Name of UUID field of Arvados object
+  UUID_FIELD_NAME = "uuid"
+  # Name of an invalid field of Arvados object
+  INVALID_FIELD_NAME = "invalid"
 
-  def test_no_args
+  # Tests that a valid Arvados object can be retrieved in a supported format
+  # using: `arv get [uuid]`. Given all other `arv foo` commands return JSON
+  # when no format is specified, JSON should be expected in this case.
+  def test_get_valid_object_no_format_specified
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get false
+      assert(arv_get_default(uuid))
     end
-    assert_equal '', out
-    assert_match /^usage:/, err
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
   end
 
-  def test_help
+  # Tests that a valid Arvados object can be retrieved in JSON format using:
+  # `arv get [uuid] --format json`.
+  def test_get_valid_object_json_format_specified
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get '-h'
+      assert(arv_get_json(uuid))
     end
-    $stderr.write err
-    assert_equal '', err
-    assert_match /^usage:/, out
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
   end
 
-  def test_file_to_dev_stdout
-    test_file_to_stdout('/dev/stdout')
-  end
-
-  def test_file_to_stdout(specify_stdout_as='-')
+  # Tests that a valid Arvados object can be retrieved in YAML format using:
+  # `arv get [uuid] --format yaml`.
+  def test_get_valid_object_yaml_format_specified
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
+      assert(arv_get_yaml(uuid))
     end
-    assert_equal '', err
-    assert_equal 'foo', out
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_yaml_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
   end
 
-  def test_file_to_file
-    remove_tmp_foo
+  # Tests that a subset of all fields of a valid Arvados object can be retrieved
+  # using: `arv get [uuid] [fields...]`.
+  def test_get_valid_object_with_valid_fields
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
+      assert(arv_get_json(uuid, STORED_VALUE_FIELD_NAME, UUID_FIELD_NAME))
     end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
+    assert(has_field_with_value(arv_object, UUID_FIELD_NAME, uuid))
   end
 
-  def test_file_to_file_no_overwrite_file
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
+  # Tests that the valid field is retrieved when both a valid and invalid field
+  # are requested from a valid Arvados object, using:
+  # `arv get [uuid] [fields...]`.
+  def test_get_valid_object_with_both_valid_and_invalid_fields
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
+      assert(arv_get_json(uuid, STORED_VALUE_FIELD_NAME, INVALID_FIELD_NAME))
     end
-    assert_match /Local file tmp\/foo already exists/, err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
+    refute(has_field_with_value(arv_object, INVALID_FIELD_NAME, stored_value))
   end
 
-  def test_file_to_file_no_overwrite_file_in_dir
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
+  # Tests that no fields are retreived when no valid fields are requested from
+  # a valid Arvados object, using: `arv get [uuid] [fields...]`.
+  def test_get_valid_object_with_no_valid_fields
+    stored_value = __method__.to_s
+    uuid = create_arv_object_with_value(stored_value)
     out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
+      assert(arv_get_json(uuid, INVALID_FIELD_NAME))
     end
-    assert_match /Local file tmp\/foo already exists/, err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    arv_object = parse_json_arv_object(out)
+    assert_equal(0, arv_object.length)
   end
 
-  def test_file_to_file_force_overwrite
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
-    assert_equal 'baz', IO.read('tmp/foo')
+  # Tests that an invalid (non-existent) Arvados object is not retrieved using:
+  # using: `arv get [non-existent-uuid]`.
+  def test_get_invalid_object
     out, err = capture_subprocess_io do
-      assert_arv_get '-f', @@foo_manifest_locator + '/', 'tmp/'
+      refute(arv_get_json(NON_EXISTENT_OBJECT_UUID))
     end
-    assert_match '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
+    refute_empty(err, "Expected error feedback on request for invalid object")
+    assert_empty(out)
   end
 
-  def test_file_to_file_skip_existing
-    File.open './tmp/foo', 'wb' do |f|
-      f.write 'baz'
-    end
-    assert_equal 'baz', IO.read('tmp/foo')
+  # Tests that help text exists using: `arv get --help`.
+  def test_help_exists
     out, err = capture_subprocess_io do
-      assert_arv_get '--skip-existing', @@foo_manifest_locator + '/', 'tmp/'
+#      assert(arv_get_default("--help"), "Expected exit code 0: #{$?}")
+       #XXX: Exit code given is 255. It probably should be 0, which seems to be
+       #     standard elsewhere. However, 255 is in line with other `arv`
+       #     commands (e.g. see `arv edit`) so ignoring the problem here.
+       arv_get_default("--help")
     end
-    assert_match '', err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/foo')
+    assert_empty(err, "Error text not expected: '#{err}'")
+    refute_empty(out, "Help text should be given")
   end
 
-  def test_file_to_dir
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
-    end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
-  end
-
-  def test_dir_to_file
-    out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
-    end
-    assert_equal '', out
-    assert_match /^usage:/, err
-  end
-
-  def test_dir_to_empty_string
-    out, err = capture_subprocess_io do
-      assert_arv_get false, @@foo_manifest_locator + '/', ''
-    end
-    assert_equal '', out
-    assert_match /^usage:/, err
-  end
-
-  def test_nonexistent_block
-    out, err = capture_subprocess_io do
-      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
-    end
-    assert_equal '', out
-    assert_match /Error:/, err
-  end
-
-  def test_nonexistent_manifest
-    out, err = capture_subprocess_io do
-      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
-    end
-    assert_equal '', out
-    assert_match /Error:/, err
-  end
-
-  def test_manifest_root_to_dir
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
-    end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
-  end
-
-  def test_manifest_root_to_dir_noslash
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
-    end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
-  end
-
-  def test_display_md5sum
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
-    end
-    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
-    assert_equal '', out
-    assert_equal 'foo', IO.read('tmp/foo')
+  protected
+  # Runs 'arv get <varargs>' with given arguments. Returns whether the exit
+  # status was 0 (i.e. success). Use $? to attain more details on failure.
+  def arv_get_default(*args)
+    return system("arv", "get", *args)
   end
 
-  def test_md5sum_nowrite
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
-    end
-    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
-    assert_equal '', out
-    assert_equal false, File.exists?('tmp/foo')
+  # Runs 'arv --format json get <varargs>' with given arguments. Returns whether
+  # the exit status was 0 (i.e. success). Use $? to attain more details on
+  # failure.
+  def arv_get_json(*args)
+    return system("arv", "--format", "json", "get", *args)
   end
 
-  def test_sha1_nowrite
-    remove_tmp_foo
-    out, err = capture_subprocess_io do
-      assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
-    end
-    assert_equal "#{Digest::SHA1.hexdigest('foo')}  ./foo\n", err
-    assert_equal '', out
-    assert_equal false, File.exists?('tmp/foo')
+  # Runs 'arv --format yaml get <varargs>' with given arguments. Returns whether
+  # the exit status was 0 (i.e. success). Use $? to attain more details on
+  # failure.
+  def arv_get_yaml(*args)
+    return system("arv", "--format", "yaml", "get", *args)
   end
 
-  def test_block_to_file
-    remove_tmp_foo
+  # Creates an Arvados object that stores a given value. Returns the uuid of the
+  # created object.
+  def create_arv_object_with_value(value)
     out, err = capture_subprocess_io do
-      assert_arv_get @@foo_manifest_locator, 'tmp/foo'
+      system("arv", "tag", "add", value, "--object", "testing")
+      assert $?.success?, "Command failure running `arv tag`: #{$?}"
     end
     assert_equal '', err
-    assert_equal '', out
-
-    digest = Digest::MD5.hexdigest('foo')
-    !(IO.read('tmp/foo')).gsub!( /^(. #{digest}+3)(.*)( 0:3:foo)$/).nil?
+    assert_operator 0, :<, out.strip.length
+    out.strip
   end
 
-  def test_create_directory_tree
-    `rm -rf ./tmp/arv-get-test/`
-    Dir.mkdir './tmp/arv-get-test'
-    out, err = capture_subprocess_io do
-      assert_arv_get @@multilevel_manifest_locator + '/', 'tmp/arv-get-test/'
+  # Parses the given JSON representation of an Arvados object, returning
+  # an equivalent Ruby representation (a hash map).
+  def parse_json_arv_object(arvObjectAsJson)
+    begin
+      parsed = JSON.parse(arvObjectAsJson)
+      assert(parsed.instance_of?(Hash))
+      return parsed
+    rescue JSON::ParserError => e
+      raise "Invalid JSON representation of Arvados object.\n" \
+            "Parse error: '#{e}'\n" \
+            "JSON: '#{arvObjectAsJson}'\n"
     end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/arv-get-test/foo/bar/baz')
   end
 
-  def test_create_partial_directory_tree
-    `rm -rf ./tmp/arv-get-test/`
-    Dir.mkdir './tmp/arv-get-test'
-    out, err = capture_subprocess_io do
-      assert_arv_get(@@multilevel_manifest_locator + '/foo/',
-                     'tmp/arv-get-test/')
+  # Parses the given JSON representation of an Arvados object, returning
+  # an equivalent Ruby representation (a hash map).
+  def parse_yaml_arv_object(arvObjectAsYaml)
+    begin
+      parsed = YAML.load(arvObjectAsYaml)
+      assert(parsed.instance_of?(Hash))
+      return parsed
+    rescue
+      raise "Invalid YAML representation of Arvados object.\n" \
+            "YAML: '#{arvObjectAsYaml}'\n"
     end
-    assert_equal '', err
-    assert_equal '', out
-    assert_equal 'baz', IO.read('tmp/arv-get-test/bar/baz')
   end
 
-  protected
-  def assert_arv_get(*args)
-    expect = case args.first
-             when true, false
-               args.shift
-             else
-               true
-             end
-    assert_equal(expect,
-                 system(['./bin/arv-get', 'arv-get'], *args),
-                 "`arv-get #{args.join ' '}` " +
-                 "should exit #{if expect then 0 else 'non-zero' end}")
-  end
-
-  def remove_tmp_foo
-    begin
-      File.unlink('tmp/foo')
-    rescue Errno::ENOENT
+  # Checks whether the given Arvados object has the given expected value for the
+  # specified field.
+  def has_field_with_value(arvObjectAsHash, fieldName, expectedValue)
+    if !arvObjectAsHash.has_key?(fieldName)
+      return false
     end
+    return (arvObjectAsHash[fieldName] == expectedValue)
   end
 end
diff --git a/sdk/cli/test/test_arv-keep-get.rb b/sdk/cli/test/test_arv-keep-get.rb
new file mode 100644 (file)
index 0000000..0e578b8
--- /dev/null
@@ -0,0 +1,251 @@
+require 'minitest/autorun'
+require 'digest/md5'
+
+class TestArvKeepGet < Minitest::Test
+  def setup
+    begin
+      Dir.mkdir './tmp'
+    rescue Errno::EEXIST
+    end
+    @@foo_manifest_locator ||= `echo -n foo | ./bin/arv-put --filename foo --no-progress -`.strip
+    @@baz_locator ||= `echo -n baz | ./bin/arv-put --as-raw --no-progress -`.strip
+    @@multilevel_manifest_locator ||= `echo ./foo/bar #{@@baz_locator} 0:3:baz | ./bin/arv-put --as-raw --no-progress -`.strip
+  end
+
+  def test_no_args
+    out, err = capture_subprocess_io do
+      assert_arv_get false
+    end
+    assert_equal '', out
+    assert_match /^usage:/, err
+  end
+
+  def test_help
+    out, err = capture_subprocess_io do
+      assert_arv_get '-h'
+    end
+    $stderr.write err
+    assert_equal '', err
+    assert_match /^usage:/, out
+  end
+
+  def test_file_to_dev_stdout
+    test_file_to_stdout('/dev/stdout')
+  end
+
+  def test_file_to_stdout(specify_stdout_as='-')
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
+    end
+    assert_equal '', err
+    assert_equal 'foo', out
+  end
+
+  def test_file_to_file
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_no_overwrite_file
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
+    end
+    assert_match /Local file tmp\/foo already exists/, err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_no_overwrite_file_in_dir
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_match /Local file tmp\/foo already exists/, err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_force_overwrite
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    assert_equal 'baz', IO.read('tmp/foo')
+    out, err = capture_subprocess_io do
+      assert_arv_get '-f', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_match '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_file_to_file_skip_existing
+    File.open './tmp/foo', 'wb' do |f|
+      f.write 'baz'
+    end
+    assert_equal 'baz', IO.read('tmp/foo')
+    out, err = capture_subprocess_io do
+      assert_arv_get '--skip-existing', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_match '', err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/foo')
+  end
+
+  def test_file_to_dir
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_dir_to_file
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
+    end
+    assert_equal '', out
+    assert_match /^usage:/, err
+  end
+
+  def test_dir_to_empty_string
+    out, err = capture_subprocess_io do
+      assert_arv_get false, @@foo_manifest_locator + '/', ''
+    end
+    assert_equal '', out
+    assert_match /^usage:/, err
+  end
+
+  def test_nonexistent_block
+    out, err = capture_subprocess_io do
+      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
+    end
+    assert_equal '', out
+    assert_match /Error:/, err
+  end
+
+  def test_nonexistent_manifest
+    out, err = capture_subprocess_io do
+      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
+    end
+    assert_equal '', out
+    assert_match /Error:/, err
+  end
+
+  def test_manifest_root_to_dir
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_manifest_root_to_dir_noslash
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_display_md5sum
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
+    assert_equal '', out
+    assert_equal 'foo', IO.read('tmp/foo')
+  end
+
+  def test_md5sum_nowrite
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
+    end
+    assert_equal "#{Digest::MD5.hexdigest('foo')}  ./foo\n", err
+    assert_equal '', out
+    assert_equal false, File.exists?('tmp/foo')
+  end
+
+  def test_sha1_nowrite
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
+    end
+    assert_equal "#{Digest::SHA1.hexdigest('foo')}  ./foo\n", err
+    assert_equal '', out
+    assert_equal false, File.exists?('tmp/foo')
+  end
+
+  def test_block_to_file
+    remove_tmp_foo
+    out, err = capture_subprocess_io do
+      assert_arv_get @@foo_manifest_locator, 'tmp/foo'
+    end
+    assert_equal '', err
+    assert_equal '', out
+
+    digest = Digest::MD5.hexdigest('foo')
+    !(IO.read('tmp/foo')).gsub!( /^(. #{digest}+3)(.*)( 0:3:foo)$/).nil?
+  end
+
+  def test_create_directory_tree
+    `rm -rf ./tmp/arv-get-test/`
+    Dir.mkdir './tmp/arv-get-test'
+    out, err = capture_subprocess_io do
+      assert_arv_get @@multilevel_manifest_locator + '/', 'tmp/arv-get-test/'
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/arv-get-test/foo/bar/baz')
+  end
+
+  def test_create_partial_directory_tree
+    `rm -rf ./tmp/arv-get-test/`
+    Dir.mkdir './tmp/arv-get-test'
+    out, err = capture_subprocess_io do
+      assert_arv_get(@@multilevel_manifest_locator + '/foo/',
+                     'tmp/arv-get-test/')
+    end
+    assert_equal '', err
+    assert_equal '', out
+    assert_equal 'baz', IO.read('tmp/arv-get-test/bar/baz')
+  end
+
+  protected
+  def assert_arv_get(*args)
+    expect = case args.first
+             when true, false
+               args.shift
+             else
+               true
+             end
+    assert_equal(expect,
+                 system(['./bin/arv-get', 'arv-get'], *args),
+                 "`arv-get #{args.join ' '}` " +
+                 "should exit #{if expect then 0 else 'non-zero' end}")
+  end
+
+  def remove_tmp_foo
+    begin
+      File.unlink('tmp/foo')
+    rescue Errno::ENOENT
+    end
+  end
+end
similarity index 99%
rename from sdk/cli/test/test_arv-put.rb
rename to sdk/cli/test/test_arv-keep-put.rb
index 2f20e18440a2ff61dde6b748d3b327587530b142..fefbc2729875e70cb890f69d56fe1d7f1c614b8d 100644 (file)
@@ -1,7 +1,7 @@
 require 'minitest/autorun'
 require 'digest/md5'
 
-class TestArvPut < Minitest::Test
+class TestArvKeepPut < Minitest::Test
   def setup
     begin Dir.mkdir './tmp' rescue Errno::EEXIST end
     begin Dir.mkdir './tmp/empty_dir' rescue Errno::EEXIST end
index 2fd03f754aed6b62ffe5a5e9a2f0fbee35300909..4812252e0c1ec49c612668b69f0fbcb4ca3c6b26 100644 (file)
@@ -29,7 +29,7 @@ setup(name='arvados-cwl-runner',
           'bin/cwl-runner'
       ],
       install_requires=[
-          'cwltool',
+          'cwltool==1.0.20150722144138',
           'arvados-python-client'
       ],
       zip_safe=True,
index d35f6dacb72b632e18718b503d0a2f4ff55e7a17..2c508dcb4a1100cf4609fc3ff3387ac089c6ab26 100644 (file)
@@ -21,7 +21,7 @@ type ServerRequiredSuite struct{}
 
 func (s *ServerRequiredSuite) SetUpSuite(c *C) {
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 }
 
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
index cad16917dba286504f6693cac3a3fbd4d05a741e..27c552a4e104094ca2ed15991e310a3b7e9cd65e 100644 (file)
@@ -9,6 +9,7 @@ import (
        "log"
        "os"
        "os/exec"
+       "strconv"
        "strings"
 )
 
@@ -98,12 +99,21 @@ func StopAPI() {
        exec.Command("python", "run_test_server.py", "stop").Run()
 }
 
-func StartKeep() {
+// StartKeep starts the given number of keep servers,
+// optionally with -enforce-permissions enabled.
+// Use numKeepServers = 2 and enforcePermissions = false under all normal circumstances.
+func StartKeep(numKeepServers int, enforcePermissions bool) {
        cwd, _ := os.Getwd()
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       cmd := exec.Command("python", "run_test_server.py", "start_keep")
+       cmdArgs := []string{"run_test_server.py", "start_keep", "--num-keep-servers", strconv.Itoa(numKeepServers)}
+       if enforcePermissions {
+               cmdArgs = append(cmdArgs, "--keep-enforce-permissions")
+       }
+
+       cmd := exec.Command("python", cmdArgs...)
+
        stderr, err := cmd.StderrPipe()
        if err != nil {
                log.Fatalf("Setting up stderr pipe: %s", err)
@@ -114,10 +124,13 @@ func StartKeep() {
        }
 }
 
-func StopKeep() {
+// StopKeep stops keep servers that were started with StartKeep.
+// numkeepServers should be the same value that was passed to StartKeep,
+// which is 2 under all normal circumstances.
+func StopKeep(numKeepServers int) {
        cwd, _ := os.Getwd()
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       exec.Command("python", "run_test_server.py", "stop_keep").Run()
+       exec.Command("python", "run_test_server.py", "stop_keep", "--num-keep-servers", strconv.Itoa(numKeepServers))
 }
index f82e5c7c594062f23da7ab42db3c4971738d5597..67c304deaf3ae54b2668cb8c2f2856e909da8c5a 100644 (file)
@@ -2,6 +2,7 @@
 package keepclient
 
 import (
+       "bytes"
        "crypto/md5"
        "crypto/tls"
        "errors"
@@ -12,7 +13,6 @@ import (
        "io/ioutil"
        "log"
        "net/http"
-       "os"
        "regexp"
        "strconv"
        "strings"
@@ -29,34 +29,58 @@ var MissingArvadosApiHost = errors.New("Missing required environment variable AR
 var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
 var InvalidLocatorError = errors.New("Invalid locator")
 
+// ErrNoSuchKeepServer is returned when GetIndex is invoked with a UUID with no matching keep server
+var ErrNoSuchKeepServer = errors.New("No keep server matching the given UUID is found")
+
+// ErrIncompleteIndex is returned when the Index response does not end with a new empty line
+var ErrIncompleteIndex = errors.New("Got incomplete index")
+
 const X_Keep_Desired_Replicas = "X-Keep-Desired-Replicas"
 const X_Keep_Replicas_Stored = "X-Keep-Replicas-Stored"
 
 // Information about Arvados and Keep servers.
 type KeepClient struct {
-       Arvados       *arvadosclient.ArvadosClient
-       Want_replicas int
-       Using_proxy   bool
-       localRoots    *map[string]string
+       Arvados            *arvadosclient.ArvadosClient
+       Want_replicas      int
+       Using_proxy        bool
+       localRoots         *map[string]string
        writableLocalRoots *map[string]string
-       gatewayRoots  *map[string]string
-       lock          sync.RWMutex
-       Client        *http.Client
+       gatewayRoots       *map[string]string
+       lock               sync.RWMutex
+       Client             *http.Client
+       Retries            int
+
+       // set to 1 if all writable services are of disk type, otherwise 0
+       replicasPerService int
 }
 
-// Create a new KeepClient.  This will contact the API server to discover Keep
-// servers.
+// MakeKeepClient creates a new KeepClient by contacting the API server to discover Keep servers.
 func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
-       var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
-       insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+       kc := New(arv)
+       return kc, kc.DiscoverKeepServers()
+}
+
+// New func creates a new KeepClient struct.
+// This func does not discover keep servers. It is the caller's responsibility.
+func New(arv *arvadosclient.ArvadosClient) *KeepClient {
+       defaultReplicationLevel := 2
+       value, err := arv.Discovery("defaultCollectionReplication")
+       if err == nil {
+               v, ok := value.(float64)
+               if ok && v > 0 {
+                       defaultReplicationLevel = int(v)
+               }
+       }
+
        kc := &KeepClient{
                Arvados:       arv,
-               Want_replicas: 2,
+               Want_replicas: defaultReplicationLevel,
                Using_proxy:   false,
                Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: arv.ApiInsecure}}},
+               Retries: 2,
        }
-       return kc, kc.DiscoverKeepServers()
+       return kc
 }
 
 // Put a block given the block hash, a reader, and the number of bytes
@@ -117,46 +141,80 @@ func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error
        }
 }
 
-// Get() retrieves a block, given a locator. Returns a reader, the
-// expected data length, the URL the block is being fetched from, and
-// an error.
-//
-// If the block checksum does not match, the final Read() on the
-// reader returned by this method will return a BadChecksum error
-// instead of EOF.
-func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
+func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, int64, string, error) {
        var errs []string
-       for _, host := range kc.getSortedRoots(locator) {
-               url := host + "/" + locator
-               req, err := http.NewRequest("GET", url, nil)
-               if err != nil {
-                       continue
-               }
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
-               resp, err := kc.Client.Do(req)
-               if err != nil || resp.StatusCode != http.StatusOK {
-                       if resp != nil {
+
+       tries_remaining := 1 + kc.Retries
+       serversToTry := kc.getSortedRoots(locator)
+       var retryList []string
+
+       for tries_remaining > 0 {
+               tries_remaining -= 1
+               retryList = nil
+
+               for _, host := range serversToTry {
+                       url := host + "/" + locator
+
+                       req, err := http.NewRequest(method, url, nil)
+                       if err != nil {
+                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+                               continue
+                       }
+                       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+                       resp, err := kc.Client.Do(req)
+                       if err != nil {
+                               // Probably a network error, may be transient,
+                               // can try again.
+                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+                               retryList = append(retryList, host)
+                       } else if resp.StatusCode != http.StatusOK {
                                var respbody []byte
-                               if resp.Body != nil {
-                                       respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+                               respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+                               resp.Body.Close()
+                               errs = append(errs, fmt.Sprintf("%s: HTTP %d %q",
+                                       url, resp.StatusCode, bytes.TrimSpace(respbody)))
+
+                               if resp.StatusCode == 408 ||
+                                       resp.StatusCode == 429 ||
+                                       resp.StatusCode >= 500 {
+                                       // Timeout, too many requests, or other
+                                       // server side failure, transient
+                                       // error, can try again.
+                                       retryList = append(retryList, host)
                                }
-                               errs = append(errs, fmt.Sprintf("%s: %d %s",
-                                       url, resp.StatusCode, strings.TrimSpace(string(respbody))))
                        } else {
-                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+                               // Success.
+                               if method == "GET" {
+                                       return HashCheckingReader{
+                                               Reader: resp.Body,
+                                               Hash:   md5.New(),
+                                               Check:  locator[0:32],
+                                       }, resp.ContentLength, url, nil
+                               } else {
+                                       resp.Body.Close()
+                                       return nil, resp.ContentLength, url, nil
+                               }
                        }
-                       continue
+
                }
-               return HashCheckingReader{
-                       Reader: resp.Body,
-                       Hash:   md5.New(),
-                       Check:  locator[0:32],
-               }, resp.ContentLength, url, nil
+               serversToTry = retryList
        }
-       log.Printf("DEBUG: GET %s failed: %v", locator, errs)
+       log.Printf("DEBUG: %s %s failed: %v", method, locator, errs)
+
        return nil, 0, "", BlockNotFound
 }
 
+// Get() retrieves a block, given a locator. Returns a reader, the
+// expected data length, the URL the block is being fetched from, and
+// an error.
+//
+// If the block checksum does not match, the final Read() on the
+// reader returned by this method will return a BadChecksum error
+// instead of EOF.
+func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
+       return kc.getOrHead("GET", locator)
+}
+
 // Ask() verifies that a block with the given hash is available and
 // readable, according to at least one Keep service. Unlike Get, it
 // does not retrieve the data or verify that the data content matches
@@ -165,18 +223,60 @@ func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error)
 // Returns the data size (content length) reported by the Keep service
 // and the URI reporting the data size.
 func (kc *KeepClient) Ask(locator string) (int64, string, error) {
-       for _, host := range kc.getSortedRoots(locator) {
-               url := host + "/" + locator
-               req, err := http.NewRequest("HEAD", url, nil)
-               if err != nil {
-                       continue
-               }
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
-               if resp, err := kc.Client.Do(req); err == nil && resp.StatusCode == http.StatusOK {
-                       return resp.ContentLength, url, nil
-               }
+       _, size, url, err := kc.getOrHead("HEAD", locator)
+       return size, url, err
+}
+
+// GetIndex retrieves a list of blocks stored on the given server whose hashes
+// begin with the given prefix. The returned reader will return an error (other
+// than EOF) if the complete index cannot be retrieved.
+//
+// This is meant to be used only by system components and admin tools.
+// It will return an error unless the client is using a "data manager token"
+// recognized by the Keep services.
+func (kc *KeepClient) GetIndex(keepServiceUUID, prefix string) (io.Reader, error) {
+       url := kc.LocalRoots()[keepServiceUUID]
+       if url == "" {
+               return nil, ErrNoSuchKeepServer
+       }
+
+       url += "/index"
+       if prefix != "" {
+               url += "/" + prefix
+       }
+
+       req, err := http.NewRequest("GET", url, nil)
+       if err != nil {
+               return nil, err
+       }
+
+       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+       resp, err := kc.Client.Do(req)
+       if err != nil {
+               return nil, err
        }
-       return 0, "", BlockNotFound
+
+       defer resp.Body.Close()
+
+       if resp.StatusCode != http.StatusOK {
+               return nil, fmt.Errorf("Got http status code: %d", resp.StatusCode)
+       }
+
+       var respBody []byte
+       respBody, err = ioutil.ReadAll(resp.Body)
+       if err != nil {
+               return nil, err
+       }
+
+       // Got index; verify that it is complete
+       // The response should be "\n" if no locators matched the prefix
+       // Else, it should be a list of locators followed by a blank line
+       if !bytes.Equal(respBody, []byte("\n")) && !bytes.HasSuffix(respBody, []byte("\n\n")) {
+               return nil, ErrIncompleteIndex
+       }
+
+       // Got complete index; strip the trailing newline and send
+       return bytes.NewReader(respBody[0 : len(respBody)-1]), nil
 }
 
 // LocalRoots() returns the map of local (i.e., disk and proxy) Keep
index c1f6a3e6f9a2614fc362985be67c86aeff355624..aaba0695f0d0751c370fbd42741c3f6e77b9fbce 100644 (file)
@@ -45,14 +45,14 @@ func (s *ServerRequiredSuite) SetUpSuite(c *C) {
                return
        }
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 }
 
 func (s *ServerRequiredSuite) TearDownSuite(c *C) {
        if *no_server {
                return
        }
-       arvadostest.StopKeep()
+       arvadostest.StopKeep(2)
        arvadostest.StopAPI()
 }
 
@@ -69,6 +69,22 @@ func (s *ServerRequiredSuite) TestMakeKeepClient(c *C) {
        }
 }
 
+func (s *ServerRequiredSuite) TestDefaultReplications(c *C) {
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, Equals, nil)
+
+       kc, err := MakeKeepClient(&arv)
+       c.Assert(kc.Want_replicas, Equals, 2)
+
+       arv.DiscoveryDoc["defaultCollectionReplication"] = 3.0
+       kc, err = MakeKeepClient(&arv)
+       c.Assert(kc.Want_replicas, Equals, 3)
+
+       arv.DiscoveryDoc["defaultCollectionReplication"] = 1.0
+       kc, err = MakeKeepClient(&arv)
+       c.Assert(kc.Want_replicas, Equals, 1)
+}
+
 type StubPutHandler struct {
        c              *C
        expectPath     string
@@ -184,6 +200,31 @@ func (fh FailHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        fh.handled <- fmt.Sprintf("http://%s", req.Host)
 }
 
+type FailThenSucceedHandler struct {
+       handled        chan string
+       count          int
+       successhandler StubGetHandler
+}
+
+func (fh *FailThenSucceedHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       if fh.count == 0 {
+               resp.WriteHeader(500)
+               fh.count += 1
+               fh.handled <- fmt.Sprintf("http://%s", req.Host)
+       } else {
+               fh.successhandler.ServeHTTP(resp, req)
+       }
+}
+
+type Error404Handler struct {
+       handled chan string
+}
+
+func (fh Error404Handler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       resp.WriteHeader(404)
+       fh.handled <- fmt.Sprintf("http://%s", req.Host)
+}
+
 func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
        log.Printf("TestFailedUploadToStubKeepServer")
 
@@ -464,7 +505,7 @@ func (s *StandaloneSuite) TestGet(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
 
        r, n, url2, err := kc.Get(hash)
        defer r.Close()
@@ -479,6 +520,26 @@ func (s *StandaloneSuite) TestGet(c *C) {
        log.Printf("TestGet done")
 }
 
+func (s *StandaloneSuite) TestGet404(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := Error404Handler{make(chan string, 1)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, n, url2, err := kc.Get(hash)
+       c.Check(err, Equals, BlockNotFound)
+       c.Check(n, Equals, int64(0))
+       c.Check(url2, Equals, "")
+       c.Check(r, Equals, nil)
+}
+
 func (s *StandaloneSuite) TestGetFail(c *C) {
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
 
@@ -490,7 +551,52 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, n, url2, err := kc.Get(hash)
+       c.Check(err, Equals, BlockNotFound)
+       c.Check(n, Equals, int64(0))
+       c.Check(url2, Equals, "")
+       c.Check(r, Equals, nil)
+}
+
+func (s *StandaloneSuite) TestGetFailRetry(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := &FailThenSucceedHandler{make(chan string, 1), 0,
+               StubGetHandler{
+                       c,
+                       hash,
+                       "abc123",
+                       http.StatusOK,
+                       []byte("foo")}}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, n, url2, err := kc.Get(hash)
+       defer r.Close()
+       c.Check(err, Equals, nil)
+       c.Check(n, Equals, int64(3))
+       c.Check(url2, Equals, fmt.Sprintf("%s/%s", ks.url, hash))
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
+func (s *StandaloneSuite) TestGetNetError(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": "http://localhost:62222"}, nil, nil)
 
        r, n, url2, err := kc.Get(hash)
        c.Check(err, Equals, BlockNotFound)
@@ -525,7 +631,7 @@ func (s *StandaloneSuite) TestGetWithServiceHint(c *C) {
        arv.ApiToken = "abc123"
        kc.SetServiceRoots(
                map[string]string{"x": ks0.url},
-               map[string]string{"x": ks0.url},
+               nil,
                map[string]string{uuid: ks.url})
 
        r, n, uri, err := kc.Get(hash + "+K@" + uuid)
@@ -572,11 +678,7 @@ func (s *StandaloneSuite) TestGetWithLocalServiceHint(c *C) {
                        "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
                        "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
                        uuid: ks.url},
-               map[string]string{
-                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
-                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
-                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
-                       uuid: ks.url},
+               nil,
                map[string]string{
                        "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
                        "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
@@ -619,7 +721,7 @@ func (s *StandaloneSuite) TestGetWithServiceHintFailoverToLocals(c *C) {
        arv.ApiToken = "abc123"
        kc.SetServiceRoots(
                map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
-               map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
+               nil,
                map[string]string{uuid: ksGateway.url})
 
        r, n, uri, err := kc.Get(hash + "+K@" + uuid)
@@ -654,7 +756,7 @@ func (s *StandaloneSuite) TestChecksum(c *C) {
        arv, err := arvadosclient.MakeArvadosClient()
        kc, _ := MakeKeepClient(&arv)
        arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
 
        r, n, _, err := kc.Get(barhash)
        _, err = ioutil.ReadAll(r)
@@ -675,7 +777,7 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
        content := []byte("waz")
        hash := fmt.Sprintf("%x", md5.Sum(content))
 
-       fh := FailHandler{
+       fh := Error404Handler{
                make(chan string, 4)}
 
        st := StubGetHandler{
@@ -743,7 +845,7 @@ func (s *ServerRequiredSuite) TestPutGetHead(c *C) {
        }
        {
                hash2, replicas, err := kc.PutB(content)
-               c.Check(hash2, Equals, fmt.Sprintf("%s+%d", hash, len(content)))
+               c.Check(hash2, Matches, fmt.Sprintf(`%s\+%d\b.*`, hash, len(content)))
                c.Check(replicas, Equals, 2)
                c.Check(err, Equals, nil)
        }
@@ -948,3 +1050,139 @@ func (s *StandaloneSuite) TestPutBWithNoWritableLocalRoots(c *C) {
        c.Check(err, Equals, InsufficientReplicasError)
        c.Check(replicas, Equals, 0)
 }
+
+type StubGetIndexHandler struct {
+       c              *C
+       expectPath     string
+       expectAPIToken string
+       httpStatus     int
+       body           []byte
+}
+
+func (h StubGetIndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       h.c.Check(req.URL.Path, Equals, h.expectPath)
+       h.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", h.expectAPIToken))
+       resp.WriteHeader(h.httpStatus)
+       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(h.body)))
+       resp.Write(h.body)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoPrefix(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index",
+               "abc123",
+               http.StatusOK,
+               []byte(hash + "+3 1443559274\n\n")}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, err := kc.GetIndex("x", "")
+       c.Check(err, Equals, nil)
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+func (s *StandaloneSuite) TestGetIndexWithPrefix(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index/" + hash[0:3],
+               "abc123",
+               http.StatusOK,
+               []byte(hash + "+3 1443559274\n\n")}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, err := kc.GetIndex("x", hash[0:3])
+       c.Check(err, Equals, nil)
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+func (s *StandaloneSuite) TestGetIndexIncomplete(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index/" + hash[0:3],
+               "abc123",
+               http.StatusOK,
+               []byte(hash)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       _, err = kc.GetIndex("x", hash[0:3])
+       c.Check(err, Equals, ErrIncompleteIndex)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoSuchServer(c *C) {
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       st := StubGetIndexHandler{
+               c,
+               "/index/" + hash[0:3],
+               "abc123",
+               http.StatusOK,
+               []byte(hash)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       _, err = kc.GetIndex("y", hash[0:3])
+       c.Check(err, Equals, ErrNoSuchKeepServer)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoSuchPrefix(c *C) {
+       st := StubGetIndexHandler{
+               c,
+               "/index/abcd",
+               "abc123",
+               http.StatusOK,
+               []byte("\n")}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, err := kc.GetIndex("x", "abcd")
+       c.Check(err, Equals, nil)
+
+       content, err2 := ioutil.ReadAll(r)
+       c.Check(err2, Equals, nil)
+       c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
diff --git a/sdk/go/keepclient/perms.go b/sdk/go/keepclient/perms.go
new file mode 100644 (file)
index 0000000..12105c6
--- /dev/null
@@ -0,0 +1,99 @@
+// Generate and verify permission signatures for Keep locators.
+//
+// See https://dev.arvados.org/projects/arvados/wiki/Keep_locator_format
+
+package keepclient
+
+import (
+       "crypto/hmac"
+       "crypto/sha1"
+       "errors"
+       "fmt"
+       "regexp"
+       "strconv"
+       "strings"
+       "time"
+)
+
+var (
+       // ErrSignatureExpired - a signature was rejected because the
+       // expiry time has passed.
+       ErrSignatureExpired = errors.New("Signature expired")
+       // ErrSignatureInvalid - a signature was rejected because it
+       // was badly formatted or did not match the given secret key.
+       ErrSignatureInvalid = errors.New("Invalid signature")
+       // ErrSignatureMissing - the given locator does not have a
+       // signature hint.
+       ErrSignatureMissing = errors.New("Missing signature")
+)
+
+// makePermSignature generates a SHA-1 HMAC digest for the given blob,
+// token, expiry, and site secret.
+func makePermSignature(blobHash, apiToken, expiry string, permissionSecret []byte) string {
+       hmac := hmac.New(sha1.New, permissionSecret)
+       hmac.Write([]byte(blobHash))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(apiToken))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(expiry))
+       digest := hmac.Sum(nil)
+       return fmt.Sprintf("%x", digest)
+}
+
+// SignLocator returns blobLocator with a permission signature
+// added. If either permissionSecret or apiToken is empty, blobLocator
+// is returned untouched.
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func SignLocator(blobLocator, apiToken string, expiry time.Time, permissionSecret []byte) string {
+       if len(permissionSecret) == 0 || apiToken == "" {
+               return blobLocator
+       }
+       // Strip off all hints: only the hash is used to sign.
+       blobHash := strings.Split(blobLocator, "+")[0]
+       timestampHex := fmt.Sprintf("%08x", expiry.Unix())
+       return blobLocator +
+               "+A" + makePermSignature(blobHash, apiToken, timestampHex, permissionSecret) +
+               "@" + timestampHex
+}
+
+var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
+
+// VerifySignature returns nil if the signature on the signedLocator
+// can be verified using the given apiToken. Otherwise it returns
+// ErrSignatureExpired (if the signature's expiry time has passed,
+// which is something the client could have figured out
+// independently), ErrSignatureMissing (if there is no signature hint
+// at all), or ErrSignatureInvalid (if the signature is present but
+// badly formatted or incorrect).
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func VerifySignature(signedLocator, apiToken string, permissionSecret []byte) error {
+       matches := signedLocatorRe.FindStringSubmatch(signedLocator)
+       if matches == nil {
+               return ErrSignatureMissing
+       }
+       blobHash := matches[1]
+       signatureHex := matches[2]
+       expiryHex := matches[3]
+       if expiryTime, err := parseHexTimestamp(expiryHex); err != nil {
+               return ErrSignatureInvalid
+       } else if expiryTime.Before(time.Now()) {
+               return ErrSignatureExpired
+       }
+       if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, permissionSecret) {
+               return ErrSignatureInvalid
+       }
+       return nil
+}
+
+func parseHexTimestamp(timestampHex string) (ts time.Time, err error) {
+       if tsInt, e := strconv.ParseInt(timestampHex, 16, 0); e == nil {
+               ts = time.Unix(tsInt, 0)
+       } else {
+               err = e
+       }
+       return ts, err
+}
diff --git a/sdk/go/keepclient/perms_test.go b/sdk/go/keepclient/perms_test.go
new file mode 100644 (file)
index 0000000..1380795
--- /dev/null
@@ -0,0 +1,98 @@
+package keepclient
+
+import (
+       "testing"
+       "time"
+)
+
+const (
+       knownHash    = "acbd18db4cc2f85cedef654fccc4a4d8"
+       knownLocator = knownHash + "+3"
+       knownToken   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
+       knownKey     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
+               "p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
+               "ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
+               "jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
+               "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
+               "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
+               "786u5rw2a9gx743dj3fgq2irk"
+       knownSignature     = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+       knownTimestamp     = "7fffffff"
+       knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
+       knownSignedLocator = knownLocator + knownSigHint
+)
+
+func TestSignLocator(t *testing.T) {
+       if ts, err := parseHexTimestamp(knownTimestamp); err != nil {
+               t.Errorf("bad knownTimestamp %s", knownTimestamp)
+       } else {
+               if knownSignedLocator != SignLocator(knownLocator, knownToken, ts, []byte(knownKey)) {
+                       t.Fail()
+               }
+       }
+}
+
+func TestVerifySignature(t *testing.T) {
+       if VerifySignature(knownSignedLocator, knownToken, []byte(knownKey)) != nil {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureExtraHints(t *testing.T) {
+       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle hint before permission signature")
+       }
+
+       if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle hint after permission signature")
+       }
+
+       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle hints around permission signature")
+       }
+}
+
+// The size hint on the locator string should not affect signature validation.
+func TestVerifySignatureWrongSize(t *testing.T) {
+       if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle incorrect size hint")
+       }
+
+       if VerifySignature(knownHash+knownSigHint, knownToken, []byte(knownKey)) != nil {
+               t.Fatal("Verify cannot handle missing size hint")
+       }
+}
+
+func TestVerifySignatureBadSig(t *testing.T) {
+       badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
+       if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureBadTimestamp(t *testing.T) {
+       badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
+       if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureBadSecret(t *testing.T) {
+       if VerifySignature(knownSignedLocator, knownToken, []byte("00000000000000000000")) != ErrSignatureInvalid {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureBadToken(t *testing.T) {
+       if VerifySignature(knownSignedLocator, "00000000", []byte(knownKey)) != ErrSignatureInvalid {
+               t.Fail()
+       }
+}
+
+func TestVerifySignatureExpired(t *testing.T) {
+       yesterday := time.Now().AddDate(0, 0, -1)
+       expiredLocator := SignLocator(knownHash, knownToken, yesterday, []byte(knownKey))
+       if VerifySignature(expiredLocator, knownToken, []byte(knownKey)) != ErrSignatureExpired {
+               t.Fail()
+       }
+}
index b467d06b21ed91ecaa06b1537ae30a6cddcb6ce2..0791d3cf856ee7d5d1268338eafa883fe9bcbb18 100644 (file)
@@ -2,6 +2,7 @@ package keepclient
 
 import (
        "crypto/md5"
+       "encoding/json"
        "errors"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/streamer"
@@ -14,7 +15,7 @@ import (
        "time"
 )
 
-type keepDisk struct {
+type keepService struct {
        Uuid     string `json:"uuid"`
        Hostname string `json:"service_host"`
        Port     int    `json:"service_port"`
@@ -23,6 +24,7 @@ type keepDisk struct {
        ReadOnly bool   `json:"read_only"`
 }
 
+// Md5String returns md5 hash for the bytes in the given string
 func Md5String(s string) string {
        return fmt.Sprintf("%x", md5.Sum([]byte(s)))
 }
@@ -49,12 +51,11 @@ func (this *KeepClient) setClientSettingsProxy() {
                        TLSHandshakeTimeout: 10 * time.Second,
                }
        }
-
 }
 
 // Set timeouts apply when connecting to keepstore services directly (assumed
 // to be on the local network).
-func (this *KeepClient) setClientSettingsStore() {
+func (this *KeepClient) setClientSettingsDisk() {
        if this.Client.Timeout == 0 {
                // Maximum time to wait for a complete response
                this.Client.Timeout = 20 * time.Second
@@ -76,26 +77,48 @@ func (this *KeepClient) setClientSettingsStore() {
        }
 }
 
+type svcList struct {
+       Items []keepService `json:"items"`
+}
+
+// DiscoverKeepServers gets list of available keep services from api server
 func (this *KeepClient) DiscoverKeepServers() error {
-       type svcList struct {
-               Items []keepDisk `json:"items"`
+       var list svcList
+
+       // Get keep services from api server
+       err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
+       if err != nil {
+               return err
        }
-       var m svcList
 
-       err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &m)
+       return this.loadKeepServers(list)
+}
 
-       if err != nil {
-               if err := this.Arvados.List("keep_disks", nil, &m); err != nil {
-                       return err
-               }
+// LoadKeepServicesFromJSON gets list of available keep services from given JSON
+func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
+       var list svcList
+
+       // Load keep services from given json
+       dec := json.NewDecoder(strings.NewReader(services))
+       if err := dec.Decode(&list); err != nil {
+               return err
        }
 
+       return this.loadKeepServers(list)
+}
+
+// loadKeepServers
+func (this *KeepClient) loadKeepServers(list svcList) error {
        listed := make(map[string]bool)
        localRoots := make(map[string]string)
        gatewayRoots := make(map[string]string)
        writableLocalRoots := make(map[string]string)
 
-       for _, service := range m.Items {
+       // replicasPerService is 1 for disks; unknown or unlimited otherwise
+       this.replicasPerService = 1
+       this.Using_proxy = false
+
+       for _, service := range list.Items {
                scheme := "http"
                if service.SSL {
                        scheme = "https"
@@ -108,16 +131,16 @@ func (this *KeepClient) DiscoverKeepServers() error {
                }
                listed[url] = true
 
-               switch service.SvcType {
-               case "disk":
-                       localRoots[service.Uuid] = url
-               case "proxy":
-                       localRoots[service.Uuid] = url
+               localRoots[service.Uuid] = url
+               if service.SvcType == "proxy" {
                        this.Using_proxy = true
                }
 
                if service.ReadOnly == false {
                        writableLocalRoots[service.Uuid] = url
+                       if service.SvcType != "disk" {
+                               this.replicasPerService = 0
+                       }
                }
 
                // Gateway services are only used when specified by
@@ -131,7 +154,7 @@ func (this *KeepClient) DiscoverKeepServers() error {
        if this.Using_proxy {
                this.setClientSettingsProxy()
        } else {
-               this.setClientSettingsStore()
+               this.setClientSettingsDisk()
        }
 
        this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
@@ -174,10 +197,7 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 
        req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
        req.Header.Add("Content-Type", "application/octet-stream")
-
-       if this.Using_proxy {
-               req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
-       }
+       req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
 
        var resp *http.Response
        if resp, err = this.Client.Do(req); err != nil {
@@ -228,13 +248,29 @@ func (this KeepClient) putReplicas(
 
        // Used to communicate status from the upload goroutines
        upload_status := make(chan uploadStatus)
-       defer close(upload_status)
+       defer func() {
+               // Wait for any abandoned uploads (e.g., we started
+               // two uploads and the first replied with replicas=2)
+               // to finish before closing the status channel.
+               go func() {
+                       for active > 0 {
+                               <-upload_status
+                       }
+                       close(upload_status)
+               }()
+       }()
 
        // Desired number of replicas
        remaining_replicas := this.Want_replicas
 
+       replicasPerThread := this.replicasPerService
+       if replicasPerThread < 1 {
+               // unlimited or unknown
+               replicasPerThread = remaining_replicas
+       }
+
        for remaining_replicas > 0 {
-               for active < remaining_replicas {
+               for active*replicasPerThread < remaining_replicas {
                        // Start some upload requests
                        if next_server < len(sv) {
                                log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
index 853d7d30354daddb86e602c4b580141fc18e1bdf..80aeb268975d8acbc7f7d1c2e771c17e7adfa719 100644 (file)
@@ -251,6 +251,7 @@ func (s *StandaloneSuite) TestTransferShortBuffer(c *C) {
 
        n, err := sr.Read(out)
        c.Check(n, Equals, 100)
+       c.Check(err, IsNil)
 
        n, err = sr.Read(out)
        c.Check(n, Equals, 0)
index a4a194f69bcc8fbdbf43650caa881325bde5dc24..3f5f9344c521f3021e2f6fd35f025e4b9c7fb95e 100644 (file)
@@ -249,9 +249,7 @@ func (this *AsyncStream) transfer(source_reader io.Reader) {
                                        }
                                }
                        } else {
-                               if reader_status == io.EOF {
-                                       // no more reads expected, so this is ok
-                               } else {
+                               if reader_status == nil {
                                        // slices channel closed without signaling EOF
                                        reader_status = io.ErrUnexpectedEOF
                                }
index 49153884955465e4a1ac7d94f2c18f0e2f2a40e6..d261cebf410086694a0d2a0d1931726c6eca1631 100755 (executable)
@@ -40,7 +40,7 @@ setup(name='arvados-pam',
           'arvados-python-client>=0.1.20150801000000',
       ],
       test_suite='tests',
-      tests_require=['mock>=1.0', 'python-pam'],
+      tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )
index 106f7a70eeac29d060bb5bcfc3d457db7c8879a4..0326608a0bb6e3270341f1ad1b01a3e7926958ac 100644 (file)
@@ -320,7 +320,7 @@ class _BufferBlock(object):
     @synchronized
     def set_state(self, nextstate, val=None):
         if (self._state, nextstate) not in self.STATE_TRANSITIONS:
-            raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate)
+            raise StateChangeError("Invalid state change from %s to %s" % (self._state, nextstate), self._state, nextstate)
         self._state = nextstate
 
         if self._state == _BufferBlock.PENDING:
@@ -491,7 +491,7 @@ class _BlockManager(object):
             for i in xrange(0, self.num_put_threads):
                 thread = threading.Thread(target=self._commit_bufferblock_worker)
                 self._put_threads.append(thread)
-                thread.daemon = False
+                thread.daemon = True
                 thread.start()
 
     def _block_prefetch_worker(self):
@@ -564,11 +564,17 @@ class _BlockManager(object):
             # Mark the block as PENDING so to disallow any more appends.
             block.set_state(_BufferBlock.PENDING)
         except StateChangeError as e:
-            if e.state == _BufferBlock.PENDING and sync:
-                block.wait_for_commit.wait()
-                if block.state() == _BufferBlock.ERROR:
-                    raise block.error
-            return
+            if e.state == _BufferBlock.PENDING:
+                if sync:
+                    block.wait_for_commit.wait()
+                else:
+                    return
+            if block.state() == _BufferBlock.COMMITTED:
+                return
+            elif block.state() == _BufferBlock.ERROR:
+                raise block.error
+            else:
+                raise
 
         if sync:
             try:
index 7ca6e7ca234f9b7ef2f1bc4dfc6ef84910c9c1e0..6fa26c672d26962a3e9cd01e140907eecec783e4 100644 (file)
@@ -282,7 +282,9 @@ class ArvPutCollectionWriter(arvados.ResumableCollectionWriter):
                                     replication=replication)
         except (TypeError, ValueError,
                 arvados.errors.StaleWriterStateError) as error:
-            return cls(cache, reporter, bytes_expected, num_retries=num_retries)
+            return cls(cache, reporter, bytes_expected,
+                       num_retries=num_retries,
+                       replication=replication)
         else:
             return writer
 
index 63b99daedd3d3931ac3822da62bff1d556d0806e..c55b8161560a3c2b4b2ad18226fe68af4bb2cc46 100644 (file)
@@ -241,19 +241,34 @@ class KeepClient(object):
         Should be used in a "with" block.
         """
         def __init__(self, todo):
+            self._started = 0
             self._todo = todo
             self._done = 0
             self._response = None
+            self._start_lock = threading.Condition()
             self._todo_lock = threading.Semaphore(todo)
             self._done_lock = threading.Lock()
+            self._local = threading.local()
 
         def __enter__(self):
+            self._start_lock.acquire()
+            if getattr(self._local, 'sequence', None) is not None:
+                # If the calling thread has used set_sequence(N), then
+                # we wait here until N other threads have started.
+                while self._started < self._local.sequence:
+                    self._start_lock.wait()
+            self._started += 1
+            self._start_lock.notifyAll()
             self._todo_lock.acquire()
+            self._start_lock.release()
             return self
 
         def __exit__(self, type, value, traceback):
             self._todo_lock.release()
 
+        def set_sequence(self, sequence):
+            self._local.sequence = sequence
+
         def shall_i_proceed(self):
             """
             Return true if the current thread should do stuff. Return
@@ -517,7 +532,11 @@ class KeepClient(object):
             return self._success
 
         def run(self):
-            with self.args['thread_limiter'] as limiter:
+            limiter = self.args['thread_limiter']
+            sequence = self.args['thread_sequence']
+            if sequence is not None:
+                limiter.set_sequence(sequence)
+            with limiter:
                 if not limiter.shall_i_proceed():
                     # My turn arrived, but the job has been done without
                     # me.
@@ -651,6 +670,7 @@ class KeepClient(object):
                 self._writable_services = self._keep_services
                 self.using_proxy = True
                 self._static_services_list = True
+                self.max_replicas_per_service = 1
             else:
                 # It's important to avoid instantiating an API client
                 # unless we actually need one, for testing's sake.
@@ -663,6 +683,7 @@ class KeepClient(object):
                 self._writable_services = None
                 self.using_proxy = None
                 self._static_services_list = False
+                self.max_replicas_per_service = 1
 
     def current_timeout(self, attempt_number):
         """Return the appropriate timeout to use for this client.
@@ -720,6 +741,11 @@ class KeepClient(object):
 
             self.using_proxy = any(ks.get('service_type') == 'proxy'
                                    for ks in self._keep_services)
+            # For disk type services, max_replicas_per_service is 1
+            # It is unknown or unlimited for non-disk typed services.
+            for ks in accessible:
+                if ('disk' != ks.get('service_type')) and (not ks.get('read_only')):
+                    self.max_replicas_per_service = None
 
     def _service_weight(self, data_hash, service_uuid):
         """Compute the weight of a Keep service endpoint for a data
@@ -738,7 +764,6 @@ class KeepClient(object):
         self.build_services_list(force_rebuild)
 
         sorted_roots = []
-
         # Use the services indicated by the given +K@... remote
         # service hints, if any are present and can be resolved to a
         # URI.
@@ -938,16 +963,16 @@ class KeepClient(object):
         locator = KeepLocator(loc_s)
 
         headers = {}
-        if self.using_proxy:
-            # Tell the proxy how many copies we want it to store
-            headers['X-Keep-Desired-Replication'] = str(copies)
+        # Tell the proxy how many copies we want it to store
+        headers['X-Keep-Desired-Replication'] = str(copies)
         roots_map = {}
-        thread_limiter = KeepClient.ThreadLimiter(copies)
+        thread_limiter = KeepClient.ThreadLimiter(1 if self.max_replicas_per_service is None else copies)
         loop = retry.RetryLoop(num_retries, self._check_loop_result,
                                backoff_start=2)
+        thread_sequence = 0
         for tries_left in loop:
             try:
-                local_roots = self.map_new_services(
+                sorted_roots = self.map_new_services(
                     roots_map, locator,
                     force_rebuild=(tries_left < num_retries), need_writable=True, **headers)
             except Exception as error:
@@ -955,7 +980,8 @@ class KeepClient(object):
                 continue
 
             threads = []
-            for service_root, ks in roots_map.iteritems():
+            for service_root, ks in [(root, roots_map[root])
+                                     for root in sorted_roots]:
                 if ks.finished():
                     continue
                 t = KeepClient.KeepWriterThread(
@@ -964,9 +990,11 @@ class KeepClient(object):
                     data_hash=data_hash,
                     service_root=service_root,
                     thread_limiter=thread_limiter,
-                    timeout=self.current_timeout(num_retries-tries_left))
+                    timeout=self.current_timeout(num_retries-tries_left),
+                    thread_sequence=thread_sequence)
                 t.start()
                 threads.append(t)
+                thread_sequence += 1
             for t in threads:
                 t.join()
             loop.save_result((thread_limiter.done() >= copies, len(threads)))
@@ -979,7 +1007,7 @@ class KeepClient(object):
                     data_hash, loop.last_result()))
         else:
             service_errors = ((key, roots_map[key].last_result()['error'])
-                              for key in local_roots
+                              for key in sorted_roots
                               if roots_map[key].last_result()['error'])
             raise arvados.errors.KeepWriteError(
                 "failed to write {} (wanted {} copies but wrote {})".format(
index 5c0b09d73096874c0d9e86907cf0fde873dadaa7..23a3793e426bb37d87998d2b1ed651e2331d38ed 100644 (file)
@@ -44,7 +44,7 @@ setup(name='arvados-python-client',
           'ws4py'
       ],
       test_suite='tests',
-      tests_require=['mock>=1.0', 'PyYAML'],
+      tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )
index ca13fc7f193263b75cec187e870d6cfa261b5b73..d325b4eb6ecb086d15effa34bc26db3e95c9ad15 100644 (file)
@@ -307,8 +307,9 @@ def _start_keep(n, keep_args):
     for arg, val in keep_args.iteritems():
         keep_cmd.append("{}={}".format(arg, val))
 
+    logf = open(os.path.join(TEST_TMPDIR, 'keep{}.log'.format(n)), 'a+')
     kp0 = subprocess.Popen(
-        keep_cmd, stdin=open('/dev/null'), stdout=sys.stderr)
+        keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
     with open(_pidfile('keep{}'.format(n)), 'w') as f:
         f.write(str(kp0.pid))
 
@@ -319,28 +320,34 @@ def _start_keep(n, keep_args):
 
     return port
 
-def run_keep(blob_signing_key=None, enforce_permissions=False):
-    stop_keep()
+def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
+    stop_keep(num_servers)
 
     keep_args = {}
-    if blob_signing_key:
-        with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
-            keep_args['--permission-key-file'] = f.name
-            f.write(blob_signing_key)
+    if not blob_signing_key:
+        blob_signing_key = 'zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc'
+    with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
+        keep_args['-blob-signing-key-file'] = f.name
+        f.write(blob_signing_key)
     if enforce_permissions:
-        keep_args['--enforce-permissions'] = 'true'
+        keep_args['-enforce-permissions'] = 'true'
+    with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
+        keep_args['-data-manager-token-file'] = f.name
+        f.write(os.environ['ARVADOS_API_TOKEN'])
+    keep_args['-never-delete'] = 'false'
 
     api = arvados.api(
         version='v1',
         host=os.environ['ARVADOS_API_HOST'],
         token=os.environ['ARVADOS_API_TOKEN'],
         insecure=True)
+
     for d in api.keep_services().list().execute()['items']:
         api.keep_services().delete(uuid=d['uuid']).execute()
     for d in api.keep_disks().list().execute()['items']:
         api.keep_disks().delete(uuid=d['uuid']).execute()
 
-    for d in range(0, 2):
+    for d in range(0, num_servers):
         port = _start_keep(d, keep_args)
         svc = api.keep_services().create(body={'keep_service': {
             'uuid': 'zzzzz-bi6l4-keepdisk{:07d}'.format(d),
@@ -362,9 +369,9 @@ def _stop_keep(n):
     if os.path.exists(os.path.join(TEST_TMPDIR, "keep.blob_signing_key")):
         os.remove(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"))
 
-def stop_keep():
-    _stop_keep(0)
-    _stop_keep(1)
+def stop_keep(num_servers=2):
+    for n in range(0, num_servers):
+        _stop_keep(n)
 
 def run_keep_proxy():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
@@ -598,6 +605,9 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('action', type=str, help="one of {}".format(actions))
     parser.add_argument('--auth', type=str, metavar='FIXTURE_NAME', help='Print authorization info for given api_client_authorizations fixture')
+    parser.add_argument('--num-keep-servers', metavar='int', type=int, default=2, help="Number of keep servers desired")
+    parser.add_argument('--keep-enforce-permissions', action="store_true", help="Enforce keep permissions")
+
     args = parser.parse_args()
 
     if args.action not in actions:
@@ -617,7 +627,7 @@ if __name__ == "__main__":
     elif args.action == 'stop':
         stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
     elif args.action == 'start_keep':
-        run_keep()
+        run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
     elif args.action == 'stop_keep':
         stop_keep()
     elif args.action == 'start_keep_proxy':
index f4bd8b692bc2520cdda8a679abbb3c2d18ff85dd..896b880778a1b0965429420af8a6f349048ef5c9 100644 (file)
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import apiclient
+import mock
 import os
 import pwd
 import re
@@ -335,10 +336,10 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
         self.main_stderr = StringIO()
         return arv_put.main(args, self.main_stdout, self.main_stderr)
 
-    def call_main_on_test_file(self):
+    def call_main_on_test_file(self, args=[]):
         with self.make_test_file() as testfile:
             path = testfile.name
-            self.call_main_with_args(['--stream', '--no-progress'path])
+            self.call_main_with_args(['--stream', '--no-progress'] + args + [path])
         self.assertTrue(
             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
                                         '098f6bcd4621d373cade4e832627b4f6')),
@@ -381,6 +382,18 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
             arv_put.ResumeCache.CACHE_DIR = orig_cachedir
             os.chmod(cachedir, 0o700)
 
+    def test_put_block_replication(self):
+        with mock.patch('arvados.collection.KeepClient.local_store_put') as put_mock, \
+             mock.patch('arvados.commands.put.ResumeCache.load') as cache_mock:
+            cache_mock.side_effect = ValueError
+            put_mock.return_value = 'acbd18db4cc2f85cedef654fccc4a4d8+3'
+            self.call_main_on_test_file(['--replication', '1'])
+            self.call_main_on_test_file(['--replication', '4'])
+            self.call_main_on_test_file(['--replication', '5'])
+            self.assertEqual(
+                [x[-1].get('copies') for x in put_mock.call_args_list],
+                [1, 4, 5])
+
     def test_normalize(self):
         testfile1 = self.make_test_file()
         testfile2 = self.make_test_file()
index 99be4c2e401d0a34ad5b0b4b62fb0074899de2ed..330dd448278259aef3a686d652c81f72cdf6405e 100644 (file)
@@ -7,6 +7,7 @@ import mock
 import os
 import unittest
 import hashlib
+import time
 
 import arvados
 from arvados._ranges import Range
@@ -626,6 +627,20 @@ class BlockManagerTest(unittest.TestCase):
             self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
             self.assertIsNone(bufferblock.buffer_view)
 
+    def test_bufferblock_commit_pending(self):
+        # Test for bug #7225
+        mockkeep = mock.MagicMock()
+        mockkeep.put.side_effect = lambda x: time.sleep(1)
+        with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
+
+            blockmanager.commit_bufferblock(bufferblock, False)
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
+
+            blockmanager.commit_bufferblock(bufferblock, True)
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
+
 
     def test_bufferblock_commit_with_error(self):
         mockkeep = mock.MagicMock()
index 13fc88def303c28d4161e3e4e3d080b9cb17cce6..ac7dd1b9f678ab6391ad71b13201374d127aaba3 100644 (file)
@@ -1144,7 +1144,7 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
         c2.save()
 
         c1.update()
-        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3 7ac66c0f148de9519b8bd264312c4d64\+7\+A[a-f0-9]{40}@[a-f0-9]{8} 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
 
 
 if __name__ == '__main__':
index c44379bac79465417e9a7d128d1aa47f13d6a6fa..a73ca84211f4da9781ef169b5ed548babd85da93 100644 (file)
@@ -332,39 +332,150 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
                 mock.responses[0].getopt(pycurl.TIMEOUT_MS),
                 int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
 
-    def test_probe_order_reference_set(self):
+    def check_no_services_error(self, verb, exc_class):
+        api_client = mock.MagicMock(name='api_client')
+        api_client.keep_services().accessible().execute.side_effect = (
+            arvados.errors.ApiError)
+        keep_client = arvados.KeepClient(api_client=api_client)
+        with self.assertRaises(exc_class) as err_check:
+            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
+        self.assertEqual(0, len(err_check.exception.request_errors()))
+
+    def test_get_error_with_no_services(self):
+        self.check_no_services_error('get', arvados.errors.KeepReadError)
+
+    def test_put_error_with_no_services(self):
+        self.check_no_services_error('put', arvados.errors.KeepWriteError)
+
+    def check_errors_from_last_retry(self, verb, exc_class):
+        api_client = self.mock_keep_services(count=2)
+        req_mock = tutil.mock_keep_responses(
+            "retry error reporting test", 500, 500, 403, 403)
+        with req_mock, tutil.skip_sleep, \
+                self.assertRaises(exc_class) as err_check:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
+                                       num_retries=3)
+        self.assertEqual([403, 403], [
+                getattr(error, 'status_code', None)
+                for error in err_check.exception.request_errors().itervalues()])
+
+    def test_get_error_reflects_last_retry(self):
+        self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
+
+    def test_put_error_reflects_last_retry(self):
+        self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
+
+    def test_put_error_does_not_include_successful_puts(self):
+        data = 'partial failure test'
+        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
+        api_client = self.mock_keep_services(count=3)
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            keep_client.put(data)
+        self.assertEqual(2, len(exc_check.exception.request_errors()))
+
+    def test_proxy_put_with_no_writable_services(self):
+        data = 'test with no writable services'
+        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
+        api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+          keep_client = arvados.KeepClient(api_client=api_client)
+          keep_client.put(data)
+        self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
+        self.assertEqual(0, len(exc_check.exception.request_errors()))
+
+
+@tutil.skip_sleep
+class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock):
+
+    def setUp(self):
         # expected_order[i] is the probe order for
         # hash=md5(sprintf("%064x",i)) where there are 16 services
         # with uuid sprintf("anything-%015x",j) with j in 0..15. E.g.,
         # the first probe for the block consisting of 64 "0"
         # characters is the service whose uuid is
         # "zzzzz-bi6l4-000000000000003", so expected_order[0][0]=='3'.
-        expected_order = [
+        self.services = 16
+        self.expected_order = [
             list('3eab2d5fc9681074'),
             list('097dba52e648f1c3'),
             list('c5b4e023f8a7d691'),
             list('9d81c02e76a3bf54'),
             ]
-        hashes = [
-            hashlib.md5("{:064x}".format(x)).hexdigest()
-            for x in range(len(expected_order))]
-        api_client = self.mock_keep_services(count=16)
-        keep_client = arvados.KeepClient(api_client=api_client)
-        for i, hash in enumerate(hashes):
-            roots = keep_client.weighted_service_roots(arvados.KeepLocator(hash))
+        self.blocks = [
+            "{:064x}".format(x)
+            for x in range(len(self.expected_order))]
+        self.hashes = [
+            hashlib.md5(self.blocks[x]).hexdigest()
+            for x in range(len(self.expected_order))]
+        self.api_client = self.mock_keep_services(count=self.services)
+        self.keep_client = arvados.KeepClient(api_client=self.api_client)
+
+    def test_weighted_service_roots_against_reference_set(self):
+        # Confirm weighted_service_roots() returns the correct order
+        for i, hash in enumerate(self.hashes):
+            roots = self.keep_client.weighted_service_roots(arvados.KeepLocator(hash))
             got_order = [
                 re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1)
                 for root in roots]
-            self.assertEqual(expected_order[i], got_order)
+            self.assertEqual(self.expected_order[i], got_order)
+
+    def test_get_probe_order_against_reference_set(self):
+        self._test_probe_order_against_reference_set(
+            lambda i: self.keep_client.get(self.hashes[i], num_retries=1))
+
+    def test_put_probe_order_against_reference_set(self):
+        # copies=1 prevents the test from being sensitive to races
+        # between writer threads.
+        self._test_probe_order_against_reference_set(
+            lambda i: self.keep_client.put(self.blocks[i], num_retries=1, copies=1))
+
+    def _test_probe_order_against_reference_set(self, op):
+        for i in range(len(self.blocks)):
+            with tutil.mock_keep_responses('', *[500 for _ in range(self.services*2)]) as mock, \
+                 self.assertRaises(arvados.errors.KeepRequestError):
+                op(i)
+            got_order = [
+                re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+                for resp in mock.responses]
+            self.assertEqual(self.expected_order[i]*2, got_order)
+
+    def test_put_probe_order_multiple_copies(self):
+        for copies in range(2, 4):
+            for i in range(len(self.blocks)):
+                with tutil.mock_keep_responses('', *[500 for _ in range(self.services*3)]) as mock, \
+                     self.assertRaises(arvados.errors.KeepWriteError):
+                    self.keep_client.put(self.blocks[i], num_retries=2, copies=copies)
+                got_order = [
+                    re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+                    for resp in mock.responses]
+                for pos, expected in enumerate(self.expected_order[i]*3):
+                    # With C threads racing to make requests, the
+                    # position of a given server in the sequence of
+                    # HTTP requests (got_order) should be within C-1
+                    # positions of that server's position in the
+                    # reference probe sequence (expected_order).
+                    close_enough = False
+                    for diff in range(1-copies, copies):
+                        if 0 <= pos+diff < len(got_order):
+                            if expected == got_order[pos+diff]:
+                                close_enough = True
+                    self.assertEqual(
+                        True, close_enough,
+                        "With copies={}, got {}, expected {}".format(
+                            copies, repr(got_order), repr(self.expected_order[i]*3)))
 
     def test_probe_waste_adding_one_server(self):
         hashes = [
             hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100)]
         initial_services = 12
-        api_client = self.mock_keep_services(count=initial_services)
-        keep_client = arvados.KeepClient(api_client=api_client)
+        self.api_client = self.mock_keep_services(count=initial_services)
+        self.keep_client = arvados.KeepClient(api_client=self.api_client)
         probes_before = [
-            keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
+            self.keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
         for added_services in range(1, 12):
             api_client = self.mock_keep_services(count=initial_services+added_services)
             keep_client = arvados.KeepClient(api_client=api_client)
@@ -402,7 +513,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
             data = hashlib.md5(data).hexdigest() + '+1234'
         # Arbitrary port number:
         aport = random.randint(1024,65535)
-        api_client = self.mock_keep_services(service_port=aport, count=16)
+        api_client = self.mock_keep_services(service_port=aport, count=self.services)
         keep_client = arvados.KeepClient(api_client=api_client)
         with mock.patch('pycurl.Curl') as curl_mock, \
              self.assertRaises(exc_class) as err_check:
@@ -419,60 +530,6 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def test_put_error_shows_probe_order(self):
         self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
 
-    def check_no_services_error(self, verb, exc_class):
-        api_client = mock.MagicMock(name='api_client')
-        api_client.keep_services().accessible().execute.side_effect = (
-            arvados.errors.ApiError)
-        keep_client = arvados.KeepClient(api_client=api_client)
-        with self.assertRaises(exc_class) as err_check:
-            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
-        self.assertEqual(0, len(err_check.exception.request_errors()))
-
-    def test_get_error_with_no_services(self):
-        self.check_no_services_error('get', arvados.errors.KeepReadError)
-
-    def test_put_error_with_no_services(self):
-        self.check_no_services_error('put', arvados.errors.KeepWriteError)
-
-    def check_errors_from_last_retry(self, verb, exc_class):
-        api_client = self.mock_keep_services(count=2)
-        req_mock = tutil.mock_keep_responses(
-            "retry error reporting test", 500, 500, 403, 403)
-        with req_mock, tutil.skip_sleep, \
-                self.assertRaises(exc_class) as err_check:
-            keep_client = arvados.KeepClient(api_client=api_client)
-            getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
-                                       num_retries=3)
-        self.assertEqual([403, 403], [
-                getattr(error, 'status_code', None)
-                for error in err_check.exception.request_errors().itervalues()])
-
-    def test_get_error_reflects_last_retry(self):
-        self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
-
-    def test_put_error_reflects_last_retry(self):
-        self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
-
-    def test_put_error_does_not_include_successful_puts(self):
-        data = 'partial failure test'
-        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
-        api_client = self.mock_keep_services(count=3)
-        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
-                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
-            keep_client = arvados.KeepClient(api_client=api_client)
-            keep_client.put(data)
-        self.assertEqual(2, len(exc_check.exception.request_errors()))
-
-    def test_proxy_put_with_no_writable_services(self):
-        data = 'test with no writable services'
-        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
-        api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
-        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
-                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
-          keep_client = arvados.KeepClient(api_client=api_client)
-          keep_client.put(data)
-        self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
-        self.assertEqual(0, len(exc_check.exception.request_errors()))
 
 class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
     DATA = 'x' * 2**10
index 8f1b687a9cad9a01e593b08f1e34cee2d4db8e53..1193e915363e80e7ccc70a0ce16c731dc3ebcaeb 100644 (file)
@@ -74,7 +74,7 @@ gem 'faye-websocket'
 gem 'themes_for_rails'
 
 gem 'arvados', '>= 0.1.20150615153458'
-gem 'arvados-cli', '>= 0.1.20150128223752'
+gem 'arvados-cli', '>=  0.1.20150605170031'
 
 # pg_power lets us use partial indexes in schema.rb in Rails 3
 gem 'pg_power'
index d671182a57c749b65bd9f7c93d2db215bc762f16..be4d4606ab21599e6e1c744d9f1ad40dd25887db 100644 (file)
@@ -41,10 +41,10 @@ GEM
       google-api-client (~> 0.6.3, >= 0.6.3)
       json (~> 1.7, >= 1.7.7)
       jwt (>= 0.1.5, < 1.0.0)
-    arvados-cli (0.1.20150205181653)
+    arvados-cli (0.1.20150930141818)
       activesupport (~> 3.2, >= 3.2.13)
       andand (~> 1.3, >= 1.3.3)
-      arvados (~> 0.1, >= 0.1.20150615153458)
+      arvados (~> 0.1, >= 0.1.20150128223554)
       curb (~> 0.8)
       google-api-client (~> 0.6.3, >= 0.6.3)
       json (~> 1.7, >= 1.7.7)
@@ -69,7 +69,7 @@ GEM
       coffee-script-source
       execjs
     coffee-script-source (1.7.0)
-    curb (0.8.6)
+    curb (0.8.8)
     daemon_controller (1.2.0)
     database_cleaner (1.2.0)
     erubis (2.7.0)
@@ -228,7 +228,7 @@ DEPENDENCIES
   acts_as_api
   andand
   arvados (>= 0.1.20150615153458)
-  arvados-cli (>= 0.1.20150128223752)
+  arvados-cli (>= 0.1.20150605170031)
   coffee-rails (~> 3.2.0)
   database_cleaner
   factory_girl_rails
index 62d5e59c8d142ce5116da263c9314def02b670d1..ba0f90f90c45d95925df0ae7892815d0b5c490b6 100644 (file)
@@ -22,7 +22,7 @@ class Arvados::V1::SchemaController < ApplicationController
         name: "arvados",
         version: "v1",
         revision: "20131114",
-        source_version: (Rails.application.config.source_version ? Rails.application.config.source_version : "No version information available") + (Rails.application.config.local_modified ? Rails.application.config.local_modified.to_s : ''),
+        source_version: AppVersion.hash,
         generatedAt: db_current_time.iso8601,
         title: "Arvados API",
         description: "The API to interact with Arvados.",
index 6854ed2625218c462f786ab960268a8be7708910..58055297a1cf9945f36d441562d3818997b4da3a 100644 (file)
@@ -13,4 +13,13 @@ class KeepService < ArvadosModel
   api_accessible :superuser, :extend => :user do |t|
   end
 
+  protected
+
+  def permission_to_create
+    current_user.andand.is_admin
+  end
+
+  def permission_to_update
+    current_user.andand.is_admin
+  end
 end
index 51bc4f98f3b89f47e9dbb0f669a5a458f0fc6a55..8777f28a86000ee9ccef86f15bd75e8160c2c3bd 100644 (file)
@@ -326,9 +326,11 @@ common:
 
   default_openid_prefix: https://www.google.com/accounts/o8/id
 
-  # source_version
-  source_version: "<%= `git log -n 1 --format=%h`.strip %>"
-  local_modified: false
+  # Override the automatic version string. With the default value of
+  # false, the version string is read from git-commit.version in
+  # Rails.root (included in vendor packages) or determined by invoking
+  # "git log".
+  source_version: false
 
 
 development:
@@ -345,7 +347,6 @@ development:
   active_record.auto_explain_threshold_in_seconds: 0.5
   assets.compress: false
   assets.debug: true
-  local_modified: "<%= '-modified' if `git status -s` != '' %>"
 
 production:
   force_ssl: true
diff --git a/services/api/config/initializers/app_version.rb b/services/api/config/initializers/app_version.rb
new file mode 100644 (file)
index 0000000..c904856
--- /dev/null
@@ -0,0 +1 @@
+require 'app_version'
diff --git a/services/api/lib/app_version.rb b/services/api/lib/app_version.rb
new file mode 100644 (file)
index 0000000..769f4e5
--- /dev/null
@@ -0,0 +1,52 @@
+# If you change this file, you'll probably also want to make the same
+# changes in apps/workbench/lib/app_version.rb.
+
+class AppVersion
+  def self.git(*args, &block)
+    IO.popen(["git", "--git-dir", ".git"] + args, "r",
+             chdir: Rails.root.join('../..'),
+             err: "/dev/null",
+             &block)
+  end
+
+  def self.forget
+    @hash = nil
+  end
+
+  # Return abbrev commit hash for current code version: "abc1234", or
+  # "abc1234-modified" if there are uncommitted changes. If present,
+  # return contents of {root}/git-commit.version instead.
+  def self.hash
+    if (cached = Rails.configuration.source_version || @hash)
+      return cached
+    end
+
+    # Read the version from our package's git-commit.version file, if available.
+    begin
+      @hash = IO.read(Rails.root.join("git-commit.version")).strip
+    rescue Errno::ENOENT
+    end
+
+    if @hash.nil? or @hash.empty?
+      begin
+        local_modified = false
+        git("status", "--porcelain") do |git_pipe|
+          git_pipe.each_line do |_|
+            local_modified = true
+            # Continue reading the pipe so git doesn't get SIGPIPE.
+          end
+        end
+        if $?.success?
+          git("log", "-n1", "--format=%H") do |git_pipe|
+            git_pipe.each_line do |line|
+              @hash = line.chomp[0...8] + (local_modified ? '-modified' : '')
+            end
+          end
+        end
+      rescue SystemCallError
+      end
+    end
+
+    @hash || "unknown"
+  end
+end
index 1b386556e62ca1363e0c31bad1645236687602b0..b7316a5e19304c1af4b892ea2513393d01406850 100644 (file)
@@ -1,12 +1,13 @@
 namespace :config do
   desc 'Ensure site configuration has all required settings'
   task check: :environment do
+    $stderr.puts "%-32s %s" % ["AppVersion (discovered)", AppVersion.hash]
     $application_config.sort.each do |k, v|
       if ENV.has_key?('QUIET') then
         # Make sure we still check for the variable to exist
         eval("Rails.configuration.#{k}")
       else
-        if /(password|secret)/.match(k) then
+        if /(password|secret|signing_key)/.match(k) then
           # Make sure we still check for the variable to exist, but don't print the value
           eval("Rails.configuration.#{k}")
           $stderr.puts "%-32s %s" % [k, '*********']
index 27cb82115b2f7993bce20b7a589bdb62eccad619..4a1fdbce758d7b552f529419f7c37f970299d298 100755 (executable)
@@ -1,5 +1,9 @@
 #!/usr/bin/env ruby
 
+# We want files written by crunch-dispatch to be writable by other processes
+# with the same GID, see bug #7228
+File.umask(0002)
+
 require 'shellwords'
 include Process
 
@@ -747,6 +751,7 @@ class Dispatcher
 
   def run
     act_as_system_user
+    User.first.group_permissions
     $stderr.puts "dispatch: ready"
     while !$signal[:term] or @running.size > 0
       read_pipes
index b70807bd38acbb484da6e8f89f0c9525b4491993..7ef98bb7c8571ec8a6f64f4eb4a8906d25c52361 100755 (executable)
@@ -12,6 +12,7 @@
 
 require 'trollop'
 require './lib/salvage_collection'
+include SalvageCollection
 
 opts = Trollop::options do
   banner ''
index 520e36ec373e36a2865a4db4bd45da3f241b66fb..f651d81eb3dd13a049896e0e9cdccb87eda6d548 100644 (file)
@@ -2,6 +2,13 @@ require 'test_helper'
 
 class Arvados::V1::SchemaControllerTest < ActionController::TestCase
 
+  setup do forget end
+  teardown do forget end
+  def forget
+    Rails.cache.delete 'arvados_v1_rest_discovery'
+    AppVersion.forget
+  end
+
   test "should get fresh discovery document" do
     MAX_SCHEMA_AGE = 60
     get :index
@@ -20,4 +27,19 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase
     assert_includes discovery_doc, 'defaultTrashLifetime'
     assert_equal discovery_doc['defaultTrashLifetime'], Rails.application.config.default_trash_lifetime
   end
+
+  test "discovery document has source_version" do
+    get :index
+    assert_response :success
+    discovery_doc = JSON.parse(@response.body)
+    assert_match /^[0-9a-f]+(-modified)?$/, discovery_doc['source_version']
+  end
+
+  test "discovery document overrides source_version with config" do
+    Rails.configuration.source_version = 'aaa888fff'
+    get :index
+    assert_response :success
+    discovery_doc = JSON.parse(@response.body)
+    assert_equal 'aaa888fff', discovery_doc['source_version']
+  end
 end
diff --git a/services/api/test/unit/app_version_test.rb b/services/api/test/unit/app_version_test.rb
new file mode 100644 (file)
index 0000000..3e9b167
--- /dev/null
@@ -0,0 +1,43 @@
+require 'test_helper'
+
+class AppVersionTest < ActiveSupport::TestCase
+
+  setup do AppVersion.forget end
+
+  teardown do AppVersion.forget end
+
+  test 'invoke git processes only on first call' do
+    AppVersion.expects(:git).
+      with("status", "--porcelain").once.
+      yields " M services/api/README\n"
+    AppVersion.expects(:git).
+      with("log", "-n1", "--format=%H").once.
+      yields "da39a3ee5e6b4b0d3255bfef95601890afd80709\n"
+
+    (0..4).each do
+      v = AppVersion.hash
+      assert_equal 'da39a3ee-modified', v
+    end
+  end
+
+  test 'override with configuration' do
+    Rails.configuration.source_version = 'foobar'
+    assert_equal 'foobar', AppVersion.hash
+    Rails.configuration.source_version = false
+    assert_not_equal 'foobar', AppVersion.hash
+  end
+
+  test 'override with file' do
+    path = Rails.root.join 'git-commit.version'
+    assert(!File.exists?(path),
+           "Packaged version file found in source tree: #{path}")
+    begin
+      File.open(path, 'w') do |f|
+        f.write "0.1.abc123\n"
+      end
+      assert_equal "0.1.abc123", AppVersion.hash
+    ensure
+      File.unlink path
+    end
+  end
+end
index 72c4f8ed0266361348a3cc113bbb8f7ce502452d..8ca8c523d2f0cfb3c844ebdebf2efc68d6608296 100644 (file)
@@ -1,7 +1,33 @@
 require 'test_helper'
 
 class KeepServiceTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  test "non-admins cannot create services" do
+    set_user_from_auth :active
+    ks = KeepService.new
+    assert_not_allowed do
+      ks.save
+    end
+  end
+
+  test "non-admins cannot update services" do
+    set_user_from_auth :active
+    ks = keep_services(:proxy)
+    ks.service_port = 64434
+    assert_not_allowed do
+      ks.save
+    end
+  end
+
+  test "admins can create services" do
+    set_user_from_auth :admin
+    ks = KeepService.new
+    assert(ks.save, "saving new service failed")
+  end
+
+  test "admins can update services" do
+    set_user_from_auth :admin
+    ks = keep_services(:proxy)
+    ks.service_port = 64434
+    assert(ks.save, "saving updated service failed")
+  end
 end
index 84587302c7ac3a4a13c832c68fda36608c49a434..aa710262b499b9c0a5e5168a8a69fc039ddeca36 100644 (file)
@@ -70,7 +70,7 @@ func (s *GitoliteSuite) TestFetchUnreadable(c *check.C) {
 }
 
 func (s *GitoliteSuite) TestPush(c *check.C) {
-       err := s.RunGit(c, activeToken, "push", "active/foo.git")
+       err := s.RunGit(c, activeToken, "push", "active/foo.git", "master:gitolite-push")
        c.Check(err, check.Equals, nil)
 
        // Check that the commit hash appears in the gitolite log, as
@@ -88,6 +88,6 @@ func (s *GitoliteSuite) TestPush(c *check.C) {
 }
 
 func (s *GitoliteSuite) TestPushUnwritable(c *check.C) {
-       err := s.RunGit(c, spectatorToken, "push", "active/foo.git")
+       err := s.RunGit(c, spectatorToken, "push", "active/foo.git", "master:gitolite-push-fail")
        c.Check(err, check.ErrorMatches, `.*HTTP code = 403.*`)
 }
index 5519ad8670ec93611740268d34f845e5c104fffe..ca03627405e0742ad419b1d1dd6daf64fba7a341 100644 (file)
@@ -24,38 +24,43 @@ var (
        maxManifestSize   uint64
 )
 
+// Collection representation
 type Collection struct {
-       Uuid              string
-       OwnerUuid         string
+       UUID              string
+       OwnerUUID         string
        ReplicationLevel  int
        BlockDigestToSize map[blockdigest.BlockDigest]int
        TotalSize         int
 }
 
+// ReadCollections holds information about collections from API server
 type ReadCollections struct {
        ReadAllCollections        bool
-       UuidToCollection          map[string]Collection
+       UUIDToCollection          map[string]Collection
        OwnerToCollectionSize     map[string]int
        BlockToDesiredReplication map[blockdigest.DigestWithSize]int
-       CollectionUuidToIndex     map[string]int
-       CollectionIndexToUuid     []string
+       CollectionUUIDToIndex     map[string]int
+       CollectionIndexToUUID     []string
        BlockToCollectionIndices  map[blockdigest.DigestWithSize][]int
 }
 
+// GetCollectionsParams params
 type GetCollectionsParams struct {
        Client    arvadosclient.ArvadosClient
        Logger    *logger.Logger
        BatchSize int
 }
 
+// SdkCollectionInfo holds collection info from api
 type SdkCollectionInfo struct {
-       Uuid         string    `json:"uuid"`
-       OwnerUuid    string    `json:"owner_uuid"`
+       UUID         string    `json:"uuid"`
+       OwnerUUID    string    `json:"owner_uuid"`
        Redundancy   int       `json:"redundancy"`
        ModifiedAt   time.Time `json:"modified_at"`
        ManifestText string    `json:"manifest_text"`
 }
 
+// SdkCollectionList lists collections from api
 type SdkCollectionList struct {
        ItemsAvailable int                 `json:"items_available"`
        Items          []SdkCollectionInfo `json:"items"`
@@ -68,7 +73,7 @@ func init() {
                "File to write the heap profiles to. Leave blank to skip profiling.")
 }
 
-// Write the heap profile to a file for later review.
+// WriteHeapProfile writes the heap profile to a file for later review.
 // Since a file is expected to only contain a single heap profile this
 // function overwrites the previously written profile, so it is safe
 // to call multiple times in a single run.
@@ -77,27 +82,28 @@ func init() {
 func WriteHeapProfile() {
        if heapProfileFilename != "" {
 
-               heap_profile, err := os.Create(heapProfileFilename)
+               heapProfile, err := os.Create(heapProfileFilename)
                if err != nil {
                        log.Fatal(err)
                }
 
-               defer heap_profile.Close()
+               defer heapProfile.Close()
 
-               err = pprof.WriteHeapProfile(heap_profile)
+               err = pprof.WriteHeapProfile(heapProfile)
                if err != nil {
                        log.Fatal(err)
                }
        }
 }
 
+// GetCollectionsAndSummarize gets collections from api and summarizes
 func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections) {
        results = GetCollections(params)
        results.Summarize(params.Logger)
 
        log.Printf("Uuid to Size used: %v", results.OwnerToCollectionSize)
        log.Printf("Read and processed %d collections",
-               len(results.UuidToCollection))
+               len(results.UUIDToCollection))
 
        // TODO(misha): Add a "readonly" flag. If we're in readonly mode,
        // lots of behaviors can become warnings (and obviously we can't
@@ -109,6 +115,7 @@ func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollec
        return
 }
 
+// GetCollections gets collections from api
 func GetCollections(params GetCollectionsParams) (results ReadCollections) {
        if &params.Client == nil {
                log.Fatalf("params.Client passed to GetCollections() should " +
@@ -157,7 +164,7 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
        // that we don't have to grow the map in most cases.
        maxExpectedCollections := int(
                float64(initialNumberOfCollectionsAvailable) * 1.01)
-       results.UuidToCollection = make(map[string]Collection, maxExpectedCollections)
+       results.UUIDToCollection = make(map[string]Collection, maxExpectedCollections)
 
        if params.Logger != nil {
                params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
@@ -191,11 +198,11 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
                        ProcessCollections(params.Logger,
                                collections.Items,
                                defaultReplicationLevel,
-                               results.UuidToCollection).Format(time.RFC3339)
+                               results.UUIDToCollection).Format(time.RFC3339)
 
                // update counts
                previousTotalCollections = totalCollections
-               totalCollections = len(results.UuidToCollection)
+               totalCollections = len(results.UUIDToCollection)
 
                log.Printf("%d collections read, %d new in last batch, "+
                        "%s latest modified date, %.0f %d %d avg,max,total manifest size",
@@ -229,13 +236,14 @@ func StrCopy(s string) string {
        return string([]byte(s))
 }
 
+// ProcessCollections read from api server
 func ProcessCollections(arvLogger *logger.Logger,
        receivedCollections []SdkCollectionInfo,
        defaultReplicationLevel int,
-       uuidToCollection map[string]Collection) (latestModificationDate time.Time) {
+       UUIDToCollection map[string]Collection) (latestModificationDate time.Time) {
        for _, sdkCollection := range receivedCollections {
-               collection := Collection{Uuid: StrCopy(sdkCollection.Uuid),
-                       OwnerUuid:         StrCopy(sdkCollection.OwnerUuid),
+               collection := Collection{UUID: StrCopy(sdkCollection.UUID),
+                       OwnerUUID:         StrCopy(sdkCollection.OwnerUUID),
                        ReplicationLevel:  sdkCollection.Redundancy,
                        BlockDigestToSize: make(map[blockdigest.BlockDigest]int)}
 
@@ -260,7 +268,7 @@ func ProcessCollections(arvLogger *logger.Logger,
                manifest := manifest.Manifest{sdkCollection.ManifestText}
                manifestSize := uint64(len(sdkCollection.ManifestText))
 
-               if _, alreadySeen := uuidToCollection[collection.Uuid]; !alreadySeen {
+               if _, alreadySeen := UUIDToCollection[collection.UUID]; !alreadySeen {
                        totalManifestSize += manifestSize
                }
                if manifestSize > maxManifestSize {
@@ -269,11 +277,11 @@ func ProcessCollections(arvLogger *logger.Logger,
 
                blockChannel := manifest.BlockIterWithDuplicates()
                for block := range blockChannel {
-                       if stored_size, stored := collection.BlockDigestToSize[block.Digest]; stored && stored_size != block.Size {
+                       if storedSize, stored := collection.BlockDigestToSize[block.Digest]; stored && storedSize != block.Size {
                                message := fmt.Sprintf(
                                        "Collection %s contains multiple sizes (%d and %d) for block %s",
-                                       collection.Uuid,
-                                       stored_size,
+                                       collection.UUID,
+                                       storedSize,
                                        block.Size,
                                        block.Digest)
                                loggerutil.FatalWithMessage(arvLogger, message)
@@ -284,7 +292,7 @@ func ProcessCollections(arvLogger *logger.Logger,
                for _, size := range collection.BlockDigestToSize {
                        collection.TotalSize += size
                }
-               uuidToCollection[collection.Uuid] = collection
+               UUIDToCollection[collection.UUID] = collection
 
                // Clear out all the manifest strings that we don't need anymore.
                // These hopefully form the bulk of our memory usage.
@@ -295,22 +303,23 @@ func ProcessCollections(arvLogger *logger.Logger,
        return
 }
 
+// Summarize the collections read
 func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
        readCollections.OwnerToCollectionSize = make(map[string]int)
        readCollections.BlockToDesiredReplication = make(map[blockdigest.DigestWithSize]int)
-       numCollections := len(readCollections.UuidToCollection)
-       readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
-       readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
+       numCollections := len(readCollections.UUIDToCollection)
+       readCollections.CollectionUUIDToIndex = make(map[string]int, numCollections)
+       readCollections.CollectionIndexToUUID = make([]string, 0, numCollections)
        readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
 
-       for _, coll := range readCollections.UuidToCollection {
-               collectionIndex := len(readCollections.CollectionIndexToUuid)
-               readCollections.CollectionIndexToUuid =
-                       append(readCollections.CollectionIndexToUuid, coll.Uuid)
-               readCollections.CollectionUuidToIndex[coll.Uuid] = collectionIndex
+       for _, coll := range readCollections.UUIDToCollection {
+               collectionIndex := len(readCollections.CollectionIndexToUUID)
+               readCollections.CollectionIndexToUUID =
+                       append(readCollections.CollectionIndexToUUID, coll.UUID)
+               readCollections.CollectionUUIDToIndex[coll.UUID] = collectionIndex
 
-               readCollections.OwnerToCollectionSize[coll.OwnerUuid] =
-                       readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
+               readCollections.OwnerToCollectionSize[coll.OwnerUUID] =
+                       readCollections.OwnerToCollectionSize[coll.OwnerUUID] + coll.TotalSize
 
                for block, size := range coll.BlockDigestToSize {
                        locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
index 1669bb778498a1b912c8cd68ebc36042e14405d4..07c82e1abc0b5581c523683b4947e7d094f7c9cd 100644 (file)
@@ -16,7 +16,7 @@ type MySuite struct{}
 var _ = Suite(&MySuite{})
 
 // This captures the result we expect from
-// ReadCollections.Summarize().  Because CollectionUuidToIndex is
+// ReadCollections.Summarize().  Because CollectionUUIDToIndex is
 // indeterminate, we replace BlockToCollectionIndices with
 // BlockToCollectionUuids.
 type ExpectedSummary struct {
@@ -41,7 +41,7 @@ func CompareSummarizedReadCollections(c *C,
                uuidSet := make(map[string]struct{})
                summarizedBlockToCollectionUuids[digest] = uuidSet
                for _, index := range indices {
-                       uuidSet[summarized.CollectionIndexToUuid[index]] = struct{}{}
+                       uuidSet[summarized.CollectionIndexToUUID[index]] = struct{}{}
                }
        }
 
@@ -67,15 +67,15 @@ func (s *MySuite) TestSummarizeSimple(checker *C) {
 
        rc.Summarize(nil)
 
-       c := rc.UuidToCollection["col0"]
+       c := rc.UUIDToCollection["col0"]
 
        blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
        blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
 
        expected := ExpectedSummary{
-               OwnerToCollectionSize:     map[string]int{c.OwnerUuid: c.TotalSize},
+               OwnerToCollectionSize:     map[string]int{c.OwnerUUID: c.TotalSize},
                BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
-               BlockToCollectionUuids:    map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+               BlockToCollectionUuids:    map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.UUID}, blockDigest2: []string{c.UUID}},
        }
 
        CompareSummarizedReadCollections(checker, rc, expected)
@@ -95,8 +95,8 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
 
        rc.Summarize(nil)
 
-       c0 := rc.UuidToCollection["col0"]
-       c1 := rc.UuidToCollection["col1"]
+       c0 := rc.UUIDToCollection["col0"]
+       c1 := rc.UUIDToCollection["col1"]
 
        blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
        blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
@@ -104,8 +104,8 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
 
        expected := ExpectedSummary{
                OwnerToCollectionSize: map[string]int{
-                       c0.OwnerUuid: c0.TotalSize,
-                       c1.OwnerUuid: c1.TotalSize,
+                       c0.OwnerUUID: c0.TotalSize,
+                       c1.OwnerUUID: c1.TotalSize,
                },
                BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{
                        blockDigest1: 5,
@@ -113,9 +113,9 @@ func (s *MySuite) TestSummarizeOverlapping(checker *C) {
                        blockDigest3: 8,
                },
                BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
-                       blockDigest1: []string{c0.Uuid},
-                       blockDigest2: []string{c0.Uuid, c1.Uuid},
-                       blockDigest3: []string{c1.Uuid},
+                       blockDigest1: []string{c0.UUID},
+                       blockDigest2: []string{c0.UUID, c1.UUID},
+                       blockDigest3: []string{c1.UUID},
                },
        }
 
index f3c1f47664a039e6b6771c620c41cef3272a8c44..223843372290f91cd58655f44eb1e404afe2127d 100644 (file)
@@ -7,6 +7,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
+// TestCollectionSpec with test blocks and desired replication level
 type TestCollectionSpec struct {
        // The desired replication level
        ReplicationLevel int
@@ -15,23 +16,23 @@ type TestCollectionSpec struct {
        Blocks []int
 }
 
-// Creates a ReadCollections object for testing based on the give
-// specs.  Only the ReadAllCollections and UuidToCollection fields are
-// populated.  To populate other fields call rc.Summarize().
+// MakeTestReadCollections creates a ReadCollections object for testing
+// based on the give specs. Only the ReadAllCollections and UUIDToCollection
+// fields are populated. To populate other fields call rc.Summarize().
 func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
        rc = ReadCollections{
                ReadAllCollections: true,
-               UuidToCollection:   map[string]Collection{},
+               UUIDToCollection:   map[string]Collection{},
        }
 
        for i, spec := range specs {
                c := Collection{
-                       Uuid:              fmt.Sprintf("col%d", i),
-                       OwnerUuid:         fmt.Sprintf("owner%d", i),
+                       UUID:              fmt.Sprintf("col%d", i),
+                       OwnerUUID:         fmt.Sprintf("owner%d", i),
                        ReplicationLevel:  spec.ReplicationLevel,
                        BlockDigestToSize: map[blockdigest.BlockDigest]int{},
                }
-               rc.UuidToCollection[c.Uuid] = c
+               rc.UUIDToCollection[c.UUID] = c
                for _, j := range spec.Blocks {
                        c.BlockDigestToSize[blockdigest.MakeTestBlockDigest(j)] = j
                }
@@ -45,16 +46,16 @@ func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
        return
 }
 
-// Returns a slice giving the collection index of each collection that
-// was passed in to MakeTestReadCollections. rc.Summarize() must be
-// called before this method, since Summarize() assigns an index to
-// each collection.
+// CollectionIndicesForTesting returns a slice giving the collection
+// index of each collection that was passed in to MakeTestReadCollections.
+// rc.Summarize() must be called before this method, since Summarize()
+// assigns an index to each collection.
 func (rc ReadCollections) CollectionIndicesForTesting() (indices []int) {
        // TODO(misha): Assert that rc.Summarize() has been called.
-       numCollections := len(rc.CollectionIndexToUuid)
+       numCollections := len(rc.CollectionIndexToUUID)
        indices = make([]int, numCollections)
        for i := 0; i < numCollections; i++ {
-               indices[i] = rc.CollectionUuidToIndex[fmt.Sprintf("col%d", i)]
+               indices[i] = rc.CollectionUUIDToIndex[fmt.Sprintf("col%d", i)]
        }
        return
 }
index 70a9ae785956396bab936e73b1a7f6ed04c63731..a9306ce83a6011002cef96b86eb6caf700feda23 100644 (file)
@@ -3,6 +3,7 @@
 package main
 
 import (
+       "errors"
        "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
@@ -41,17 +42,17 @@ func init() {
 func main() {
        flag.Parse()
        if minutesBetweenRuns == 0 {
-               err := singlerun()
+               err := singlerun(makeArvadosClient())
                if err != nil {
-                       log.Fatalf("Got an error: %v", err)
+                       log.Fatalf("singlerun: %v", err)
                }
        } else {
                waitTime := time.Minute * time.Duration(minutesBetweenRuns)
                for {
                        log.Println("Beginning Run")
-                       err := singlerun()
+                       err := singlerun(makeArvadosClient())
                        if err != nil {
-                               log.Printf("Got an error: %v", err)
+                               log.Printf("singlerun: %v", err)
                        }
                        log.Printf("Sleeping for %d minutes", minutesBetweenRuns)
                        time.Sleep(waitTime)
@@ -59,16 +60,20 @@ func main() {
        }
 }
 
-func singlerun() error {
+func makeArvadosClient() arvadosclient.ArvadosClient {
        arv, err := arvadosclient.MakeArvadosClient()
        if err != nil {
-               log.Fatalf("Error setting up arvados client %s", err.Error())
+               log.Fatalf("Error setting up arvados client: %s", err)
        }
+       return arv
+}
 
-       if is_admin, err := util.UserIsAdmin(arv); err != nil {
-               log.Fatalf("Error querying current arvados user %s", err.Error())
-       } else if !is_admin {
-               log.Fatalf("Current user is not an admin. Datamanager can only be run by admins.")
+func singlerun(arv arvadosclient.ArvadosClient) error {
+       var err error
+       if isAdmin, err := util.UserIsAdmin(arv); err != nil {
+               return errors.New("Error verifying admin token: " + err.Error())
+       } else if !isAdmin {
+               return errors.New("Current user is not an admin. Datamanager requires a privileged token.")
        }
 
        var arvLogger *logger.Logger
@@ -153,14 +158,13 @@ func singlerun() error {
 
        if trashErr != nil {
                return err
-       } else {
-               keep.SendTrashLists(keep.GetDataManagerToken(arvLogger), kc, trashLists)
        }
+       keep.SendTrashLists(kc, trashLists)
 
        return nil
 }
 
-// Returns a data fetcher that fetches data from remote servers.
+// BuildDataFetcher returns a data fetcher that fetches data from remote servers.
 func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher {
        return func(arvLogger *logger.Logger,
                readCollections *collection.ReadCollections,
diff --git a/services/datamanager/datamanager_test.go b/services/datamanager/datamanager_test.go
new file mode 100644 (file)
index 0000000..c2cb762
--- /dev/null
@@ -0,0 +1,513 @@
+package main
+
+import (
+       "encoding/json"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io/ioutil"
+       "net/http"
+       "os"
+       "os/exec"
+       "regexp"
+       "strings"
+       "testing"
+       "time"
+)
+
+const (
+       ActiveUserToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+       AdminToken      = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
+)
+
+var arv arvadosclient.ArvadosClient
+var keepClient *keepclient.KeepClient
+var keepServers []string
+
+func SetupDataManagerTest(t *testing.T) {
+       os.Setenv("ARVADOS_API_HOST_INSECURE", "true")
+
+       // start api and keep servers
+       arvadostest.ResetEnv()
+       arvadostest.StartAPI()
+       arvadostest.StartKeep(2, false)
+
+       arv = makeArvadosClient()
+
+       // keep client
+       keepClient = &keepclient.KeepClient{
+               Arvados:       &arv,
+               Want_replicas: 2,
+               Using_proxy:   true,
+               Client:        &http.Client{},
+       }
+
+       // discover keep services
+       if err := keepClient.DiscoverKeepServers(); err != nil {
+               t.Fatalf("Error discovering keep services: %s", err)
+       }
+       keepServers = []string{}
+       for _, host := range keepClient.LocalRoots() {
+               keepServers = append(keepServers, host)
+       }
+}
+
+func TearDownDataManagerTest(t *testing.T) {
+       arvadostest.StopKeep(2)
+       arvadostest.StopAPI()
+}
+
+func putBlock(t *testing.T, data string) string {
+       locator, _, err := keepClient.PutB([]byte(data))
+       if err != nil {
+               t.Fatalf("Error putting test data for %s %s %v", data, locator, err)
+       }
+       if locator == "" {
+               t.Fatalf("No locator found after putting test data")
+       }
+
+       splits := strings.Split(locator, "+")
+       return splits[0] + "+" + splits[1]
+}
+
+func getBlock(t *testing.T, locator string, data string) {
+       reader, blocklen, _, err := keepClient.Get(locator)
+       if err != nil {
+               t.Fatalf("Error getting test data in setup for %s %s %v", data, locator, err)
+       }
+       if reader == nil {
+               t.Fatalf("No reader found after putting test data")
+       }
+       if blocklen != int64(len(data)) {
+               t.Fatalf("blocklen %d did not match data len %d", blocklen, len(data))
+       }
+
+       all, err := ioutil.ReadAll(reader)
+       if string(all) != data {
+               t.Fatalf("Data read %s did not match expected data %s", string(all), data)
+       }
+}
+
+// Create a collection using arv-put
+func createCollection(t *testing.T, data string) string {
+       tempfile, err := ioutil.TempFile(os.TempDir(), "temp-test-file")
+       defer os.Remove(tempfile.Name())
+
+       _, err = tempfile.Write([]byte(data))
+       if err != nil {
+               t.Fatalf("Error writing to tempfile %v", err)
+       }
+
+       // arv-put
+       output, err := exec.Command("arv-put", "--use-filename", "test.txt", tempfile.Name()).Output()
+       if err != nil {
+               t.Fatalf("Error running arv-put %s", err)
+       }
+
+       uuid := string(output[0:27]) // trim terminating char
+       return uuid
+}
+
+// Get collection locator
+var locatorMatcher = regexp.MustCompile(`^([0-9a-f]{32})\+(\d*)(.*)$`)
+
+func getFirstLocatorFromCollection(t *testing.T, uuid string) string {
+       manifest := getCollection(t, uuid)["manifest_text"].(string)
+
+       locator := strings.Split(manifest, " ")[1]
+       match := locatorMatcher.FindStringSubmatch(locator)
+       if match == nil {
+               t.Fatalf("No locator found in collection manifest %s", manifest)
+       }
+
+       return match[1] + "+" + match[2]
+}
+
+func getCollection(t *testing.T, uuid string) Dict {
+       getback := make(Dict)
+       err := arv.Get("collections", uuid, nil, &getback)
+       if err != nil {
+               t.Fatalf("Error getting collection %s", err)
+       }
+       if getback["uuid"] != uuid {
+               t.Fatalf("Get collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
+       }
+
+       return getback
+}
+
+func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
+       err := arv.Update("collections", uuid, arvadosclient.Dict{
+               "collection": arvadosclient.Dict{
+                       paramName: paramValue,
+               },
+       }, &arvadosclient.Dict{})
+
+       if err != nil {
+               t.Fatalf("Error updating collection %s", err)
+       }
+}
+
+type Dict map[string]interface{}
+
+func deleteCollection(t *testing.T, uuid string) {
+       getback := make(Dict)
+       err := arv.Delete("collections", uuid, nil, &getback)
+       if err != nil {
+               t.Fatalf("Error deleting collection %s", err)
+       }
+       if getback["uuid"] != uuid {
+               t.Fatalf("Delete collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
+       }
+}
+
+func dataManagerSingleRun(t *testing.T) {
+       err := singlerun(arv)
+       if err != nil {
+               t.Fatalf("Error during singlerun %s", err)
+       }
+}
+
+func getBlockIndexesForServer(t *testing.T, i int) []string {
+       var indexes []string
+
+       path := keepServers[i] + "/index"
+       client := http.Client{}
+       req, err := http.NewRequest("GET", path, nil)
+       req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+       req.Header.Add("Content-Type", "application/octet-stream")
+       resp, err := client.Do(req)
+       defer resp.Body.Close()
+
+       if err != nil {
+               t.Fatalf("Error during %s %s", path, err)
+       }
+
+       body, err := ioutil.ReadAll(resp.Body)
+       if err != nil {
+               t.Fatalf("Error reading response from %s %s", path, err)
+       }
+
+       lines := strings.Split(string(body), "\n")
+       for _, line := range lines {
+               indexes = append(indexes, strings.Split(line, " ")...)
+       }
+
+       return indexes
+}
+
+func getBlockIndexes(t *testing.T) [][]string {
+       var indexes [][]string
+
+       for i := 0; i < len(keepServers); i++ {
+               indexes = append(indexes, getBlockIndexesForServer(t, i))
+       }
+       return indexes
+}
+
+func verifyBlocks(t *testing.T, notExpected []string, expected []string, minReplication int) {
+       blocks := getBlockIndexes(t)
+
+       for _, block := range notExpected {
+               for _, idx := range blocks {
+                       if valueInArray(block, idx) {
+                               t.Fatalf("Found unexpected block %s", block)
+                       }
+               }
+       }
+
+       for _, block := range expected {
+               nFound := 0
+               for _, idx := range blocks {
+                       if valueInArray(block, idx) {
+                               nFound++
+                       }
+               }
+               if nFound < minReplication {
+                       t.Fatalf("Found %d replicas of block %s, expected >= %d", nFound, block, minReplication)
+               }
+       }
+}
+
+func valueInArray(value string, list []string) bool {
+       for _, v := range list {
+               if value == v {
+                       return true
+               }
+       }
+       return false
+}
+
+/*
+Test env uses two keep volumes. The volume names can be found by reading the files
+  ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
+
+The keep volumes are of the dir structure:
+  volumeN/subdir/locator
+*/
+func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
+       // First get rid of any size hints in the locators
+       var trimmedBlockLocators []string
+       for _, block := range oldUnusedBlockLocators {
+               trimmedBlockLocators = append(trimmedBlockLocators, strings.Split(block, "+")[0])
+       }
+
+       // Get the working dir so that we can read keep{n}.volume files
+       wd, err := os.Getwd()
+       if err != nil {
+               t.Fatalf("Error getting working dir %s", err)
+       }
+
+       // Now cycle through the two keep volumes
+       oldTime := time.Now().AddDate(0, -2, 0)
+       for i := 0; i < 2; i++ {
+               filename := fmt.Sprintf("%s/../../tmp/keep%d.volume", wd, i)
+               volumeDir, err := ioutil.ReadFile(filename)
+               if err != nil {
+                       t.Fatalf("Error reading keep volume file %s %s", filename, err)
+               }
+
+               // Read the keep volume dir structure
+               volumeContents, err := ioutil.ReadDir(string(volumeDir))
+               if err != nil {
+                       t.Fatalf("Error reading keep dir %s %s", string(volumeDir), err)
+               }
+
+               // Read each subdir for each of the keep volume dir
+               for _, subdir := range volumeContents {
+                       subdirName := fmt.Sprintf("%s/%s", volumeDir, subdir.Name())
+                       subdirContents, err := ioutil.ReadDir(string(subdirName))
+                       if err != nil {
+                               t.Fatalf("Error reading keep dir %s %s", string(subdirName), err)
+                       }
+
+                       // Now we got to the files. The files are names are the block locators
+                       for _, fileInfo := range subdirContents {
+                               blockName := fileInfo.Name()
+                               myname := fmt.Sprintf("%s/%s", subdirName, blockName)
+                               if valueInArray(blockName, trimmedBlockLocators) {
+                                       err = os.Chtimes(myname, oldTime, oldTime)
+                               }
+                       }
+               }
+       }
+}
+
+func getStatus(t *testing.T, path string) interface{} {
+       client := http.Client{}
+       req, err := http.NewRequest("GET", path, nil)
+       req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+       req.Header.Add("Content-Type", "application/octet-stream")
+       resp, err := client.Do(req)
+       if err != nil {
+               t.Fatalf("Error during %s %s", path, err)
+       }
+       defer resp.Body.Close()
+
+       var s interface{}
+       json.NewDecoder(resp.Body).Decode(&s)
+
+       return s
+}
+
+// Wait until PullQueue and TrashQueue are empty on all keepServers.
+func waitUntilQueuesFinishWork(t *testing.T) {
+       for _, ks := range keepServers {
+               for done := false; !done; {
+                       time.Sleep(100 * time.Millisecond)
+                       s := getStatus(t, ks+"/status.json")
+                       for _, qName := range []string{"PullQueue", "TrashQueue"} {
+                               qStatus := s.(map[string]interface{})[qName].(map[string]interface{})
+                               if qStatus["Queued"].(float64)+qStatus["InProgress"].(float64) == 0 {
+                                       done = true
+                               }
+                       }
+               }
+       }
+}
+
+/*
+Create some blocks and backdate some of them.
+Also create some collections and delete some of them.
+Verify block indexes.
+*/
+func TestPutAndGetBlocks(t *testing.T) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       // Put some blocks which will be backdated later on
+       // The first one will also be used in a collection and hence should not be deleted when datamanager runs.
+       // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
+       var oldUnusedBlockLocators []string
+       oldUnusedBlockData := "this block will have older mtime"
+       for i := 0; i < 5; i++ {
+               oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
+       }
+       for i := 0; i < 5; i++ {
+               getBlock(t, oldUnusedBlockLocators[i], fmt.Sprintf("%s%d", oldUnusedBlockData, i))
+       }
+
+       // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
+       oldUsedBlockData := "this collection block will have older mtime"
+       oldUsedBlockLocator := putBlock(t, oldUsedBlockData)
+       getBlock(t, oldUsedBlockLocator, oldUsedBlockData)
+
+       // Put some more blocks which will not be backdated; hence they are still new, but not in any collection.
+       // Hence, even though unreferenced, these should not be deleted when datamanager runs.
+       var newBlockLocators []string
+       newBlockData := "this block is newer"
+       for i := 0; i < 5; i++ {
+               newBlockLocators = append(newBlockLocators, putBlock(t, fmt.Sprintf("%s%d", newBlockData, i)))
+       }
+       for i := 0; i < 5; i++ {
+               getBlock(t, newBlockLocators[i], fmt.Sprintf("%s%d", newBlockData, i))
+       }
+
+       // Create a collection that would be deleted later on
+       toBeDeletedCollectionUUID := createCollection(t, "some data for collection creation")
+       toBeDeletedCollectionLocator := getFirstLocatorFromCollection(t, toBeDeletedCollectionUUID)
+
+       // Create another collection that has the same data as the one of the old blocks
+       oldUsedBlockCollectionUUID := createCollection(t, oldUsedBlockData)
+       oldUsedBlockCollectionLocator := getFirstLocatorFromCollection(t, oldUsedBlockCollectionUUID)
+       if oldUsedBlockCollectionLocator != oldUsedBlockLocator {
+               t.Fatalf("Locator of the collection with the same data as old block is different %s", oldUsedBlockCollectionLocator)
+       }
+
+       // Create another collection whose replication level will be changed
+       replicationCollectionUUID := createCollection(t, "replication level on this collection will be reduced")
+       replicationCollectionLocator := getFirstLocatorFromCollection(t, replicationCollectionUUID)
+
+       // Create two collections with same data; one will be deleted later on
+       dataForTwoCollections := "one of these collections will be deleted"
+       oneOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
+       oneOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, oneOfTwoWithSameDataUUID)
+       secondOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
+       secondOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, secondOfTwoWithSameDataUUID)
+       if oneOfTwoWithSameDataLocator != secondOfTwoWithSameDataLocator {
+               t.Fatalf("Locators for both these collections expected to be same: %s %s", oneOfTwoWithSameDataLocator, secondOfTwoWithSameDataLocator)
+       }
+
+       // Verify blocks before doing any backdating / deleting.
+       var expected []string
+       expected = append(expected, oldUnusedBlockLocators...)
+       expected = append(expected, newBlockLocators...)
+       expected = append(expected, toBeDeletedCollectionLocator)
+       expected = append(expected, replicationCollectionLocator)
+       expected = append(expected, oneOfTwoWithSameDataLocator)
+       expected = append(expected, secondOfTwoWithSameDataLocator)
+
+       verifyBlocks(t, nil, expected, 2)
+
+       // Run datamanager in singlerun mode
+       dataManagerSingleRun(t)
+       waitUntilQueuesFinishWork(t)
+
+       verifyBlocks(t, nil, expected, 2)
+
+       // Backdate the to-be old blocks and delete the collections
+       backdateBlocks(t, oldUnusedBlockLocators)
+       deleteCollection(t, toBeDeletedCollectionUUID)
+       deleteCollection(t, secondOfTwoWithSameDataUUID)
+
+       // Run data manager again
+       dataManagerSingleRun(t)
+       waitUntilQueuesFinishWork(t)
+
+       // Get block indexes and verify that all backdated blocks except the first one used in collection are not included.
+       expected = expected[:0]
+       expected = append(expected, oldUsedBlockLocator)
+       expected = append(expected, newBlockLocators...)
+       expected = append(expected, toBeDeletedCollectionLocator)
+       expected = append(expected, oneOfTwoWithSameDataLocator)
+       expected = append(expected, secondOfTwoWithSameDataLocator)
+
+       verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
+
+       // Reduce desired replication on replicationCollectionUUID
+       // collection, and verify that Data Manager does not reduce
+       // actual replication any further than that. (It might not
+       // reduce actual replication at all; that's OK for this test.)
+
+       // Reduce desired replication level.
+       updateCollection(t, replicationCollectionUUID, "replication_desired", "1")
+       collection := getCollection(t, replicationCollectionUUID)
+       if collection["replication_desired"].(interface{}) != float64(1) {
+               t.Fatalf("After update replication_desired is not 1; instead it is %v", collection["replication_desired"])
+       }
+
+       // Verify data is currently overreplicated.
+       verifyBlocks(t, nil, []string{replicationCollectionLocator}, 2)
+
+       // Run data manager again
+       dataManagerSingleRun(t)
+       waitUntilQueuesFinishWork(t)
+
+       // Verify data is not underreplicated.
+       verifyBlocks(t, nil, []string{replicationCollectionLocator}, 1)
+
+       // Verify *other* collections' data is not underreplicated.
+       verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
+}
+
+func TestDatamanagerSingleRunRepeatedly(t *testing.T) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       for i := 0; i < 10; i++ {
+               err := singlerun(arv)
+               if err != nil {
+                       t.Fatalf("Got an error during datamanager singlerun: %v", err)
+               }
+       }
+}
+
+func TestGetStatusRepeatedly(t *testing.T) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       for i := 0; i < 10; i++ {
+               for j := 0; j < 2; j++ {
+                       s := getStatus(t, keepServers[j]+"/status.json")
+
+                       var pullQueueStatus interface{}
+                       pullQueueStatus = s.(map[string]interface{})["PullQueue"]
+                       var trashQueueStatus interface{}
+                       trashQueueStatus = s.(map[string]interface{})["TrashQueue"]
+
+                       if pullQueueStatus.(map[string]interface{})["Queued"] == nil ||
+                               pullQueueStatus.(map[string]interface{})["InProgress"] == nil ||
+                               trashQueueStatus.(map[string]interface{})["Queued"] == nil ||
+                               trashQueueStatus.(map[string]interface{})["InProgress"] == nil {
+                               t.Fatalf("PullQueue and TrashQueue status not found")
+                       }
+
+                       time.Sleep(100 * time.Millisecond)
+               }
+       }
+}
+
+func TestRunDatamanagerWithBogusServer(t *testing.T) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       arv.ApiServer = "bogus-server"
+
+       err := singlerun(arv)
+       if err == nil {
+               t.Fatalf("Expected error during singlerun with bogus server")
+       }
+}
+
+func TestRunDatamanagerAsNonAdminUser(t *testing.T) {
+       defer TearDownDataManagerTest(t)
+       SetupDataManagerTest(t)
+
+       arv.ApiToken = ActiveUserToken
+
+       err := singlerun(arv)
+       if err == nil {
+               t.Fatalf("Expected error during singlerun as non-admin user")
+       }
+}
index 0e3cc1d44e79ef7b533a8cccef77a7c5cf6c6605..86c2b089aa13088d8da8f524ab21dcc8dafc9641 100644 (file)
@@ -6,7 +6,6 @@ import (
        "bufio"
        "encoding/json"
        "errors"
-       "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
@@ -19,38 +18,42 @@ import (
        "net/http"
        "strconv"
        "strings"
-       "sync"
        "time"
 )
 
+// ServerAddress struct
 type ServerAddress struct {
-       SSL  bool   `json:service_ssl_flag`
-       Host string `json:"service_host"`
-       Port int    `json:"service_port"`
-       Uuid string `json:"uuid"`
+       SSL         bool   `json:service_ssl_flag`
+       Host        string `json:"service_host"`
+       Port        int    `json:"service_port"`
+       UUID        string `json:"uuid"`
+       ServiceType string `json:"service_type"`
 }
 
-// Info about a particular block returned by the server
+// BlockInfo is info about a particular block returned by the server
 type BlockInfo struct {
        Digest blockdigest.DigestWithSize
        Mtime  int64 // TODO(misha): Replace this with a timestamp.
 }
 
-// Info about a specified block given by a server
+// BlockServerInfo is info about a specified block given by a server
 type BlockServerInfo struct {
        ServerIndex int
        Mtime       int64 // TODO(misha): Replace this with a timestamp.
 }
 
+// ServerContents struct
 type ServerContents struct {
        BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
 }
 
+// ServerResponse struct
 type ServerResponse struct {
        Address  ServerAddress
        Contents ServerContents
 }
 
+// ReadServers struct
 type ReadServers struct {
        ReadAllServers           bool
        KeepServerIndexToAddress []ServerAddress
@@ -60,67 +63,34 @@ type ReadServers struct {
        BlockReplicationCounts   map[int]int
 }
 
+// GetKeepServersParams struct
 type GetKeepServersParams struct {
        Client arvadosclient.ArvadosClient
        Logger *logger.Logger
        Limit  int
 }
 
-type KeepServiceList struct {
+// ServiceList consists of the addresses of all the available kee servers
+type ServiceList struct {
        ItemsAvailable int             `json:"items_available"`
        KeepServers    []ServerAddress `json:"items"`
 }
 
-var (
-       // Don't access the token directly, use getDataManagerToken() to
-       // make sure it's been read.
-       dataManagerToken             string
-       dataManagerTokenFile         string
-       dataManagerTokenFileReadOnce sync.Once
-)
-
-func init() {
-       flag.StringVar(&dataManagerTokenFile,
-               "data-manager-token-file",
-               "",
-               "File with the API token we should use to contact keep servers.")
-}
-
+// String
 // TODO(misha): Change this to include the UUID as well.
 func (s ServerAddress) String() string {
        return s.URL()
 }
 
+// URL of the keep server
 func (s ServerAddress) URL() string {
        if s.SSL {
                return fmt.Sprintf("https://%s:%d", s.Host, s.Port)
-       } else {
-               return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
-       }
-}
-
-func GetDataManagerToken(arvLogger *logger.Logger) string {
-       readDataManagerToken := func() {
-               if dataManagerTokenFile == "" {
-                       flag.Usage()
-                       loggerutil.FatalWithMessage(arvLogger,
-                               "Data Manager Token needed, but data manager token file not specified.")
-               } else {
-                       rawRead, err := ioutil.ReadFile(dataManagerTokenFile)
-                       if err != nil {
-                               loggerutil.FatalWithMessage(arvLogger,
-                                       fmt.Sprintf("Unexpected error reading token file %s: %v",
-                                               dataManagerTokenFile,
-                                               err))
-                       }
-                       dataManagerToken = strings.TrimSpace(string(rawRead))
-               }
        }
-
-       dataManagerTokenFileReadOnce.Do(readDataManagerToken)
-       return dataManagerToken
+       return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
 }
 
+// GetKeepServersAndSummarize gets keep servers from api
 func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServers) {
        results = GetKeepServers(params)
        log.Printf("Returned %d keep disks", len(results.ServerToContents))
@@ -132,20 +102,16 @@ func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServer
        return
 }
 
+// GetKeepServers from api server
 func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
-       if &params.Client == nil {
-               log.Fatalf("params.Client passed to GetKeepServers() should " +
-                       "contain a valid ArvadosClient, but instead it is nil.")
-       }
-
        sdkParams := arvadosclient.Dict{
-               "filters": [][]string{[]string{"service_type", "=", "disk"}},
+               "filters": [][]string{[]string{"service_type", "!=", "proxy"}},
        }
        if params.Limit > 0 {
                sdkParams["limit"] = params.Limit
        }
 
-       var sdkResponse KeepServiceList
+       var sdkResponse ServiceList
        err := params.Client.List("keep_services", sdkParams, &sdkResponse)
 
        if err != nil {
@@ -153,6 +119,14 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
                        fmt.Sprintf("Error requesting keep disks from API server: %v", err))
        }
 
+       // Currently, only "disk" types are supported. Stop if any other service types are found.
+       for _, server := range sdkResponse.KeepServers {
+               if server.ServiceType != "disk" {
+                       loggerutil.FatalWithMessage(params.Logger,
+                               fmt.Sprintf("Unsupported service type %q found for: %v", server.ServiceType, server))
+               }
+       }
+
        if params.Logger != nil {
                params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
                        keepInfo := logger.GetOrCreateMap(p, "keep_info")
@@ -177,9 +151,6 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
 
        log.Printf("Got Server Addresses: %v", results)
 
-       // This is safe for concurrent use
-       client := http.Client{}
-
        // Send off all the index requests concurrently
        responseChan := make(chan ServerResponse)
        for _, keepServer := range sdkResponse.KeepServers {
@@ -192,7 +163,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
                go func(keepServer ServerAddress) {
                        responseChan <- GetServerContents(params.Logger,
                                keepServer,
-                               client)
+                               params.Client)
                }(keepServer)
        }
 
@@ -218,14 +189,15 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
        return
 }
 
+// GetServerContents of the keep server
 func GetServerContents(arvLogger *logger.Logger,
        keepServer ServerAddress,
-       client http.Client) (response ServerResponse) {
+       arv arvadosclient.ArvadosClient) (response ServerResponse) {
 
-       GetServerStatus(arvLogger, keepServer, client)
+       GetServerStatus(arvLogger, keepServer, arv)
 
-       req := CreateIndexRequest(arvLogger, keepServer)
-       resp, err := client.Do(req)
+       req := CreateIndexRequest(arvLogger, keepServer, arv)
+       resp, err := arv.Client.Do(req)
        if err != nil {
                loggerutil.FatalWithMessage(arvLogger,
                        fmt.Sprintf("Error fetching %s: %v. Response was %+v",
@@ -237,9 +209,10 @@ func GetServerContents(arvLogger *logger.Logger,
        return ReadServerResponse(arvLogger, keepServer, resp)
 }
 
+// GetServerStatus get keep server status by invoking /status.json
 func GetServerStatus(arvLogger *logger.Logger,
        keepServer ServerAddress,
-       client http.Client) {
+       arv arvadosclient.ArvadosClient) {
        url := fmt.Sprintf("http://%s:%d/status.json",
                keepServer.Host,
                keepServer.Port)
@@ -253,11 +226,11 @@ func GetServerStatus(arvLogger *logger.Logger,
                        serverInfo["host"] = keepServer.Host
                        serverInfo["port"] = keepServer.Port
 
-                       keepInfo[keepServer.Uuid] = serverInfo
+                       keepInfo[keepServer.UUID] = serverInfo
                })
        }
 
-       resp, err := client.Get(url)
+       resp, err := arv.Client.Get(url)
        if err != nil {
                loggerutil.FatalWithMessage(arvLogger,
                        fmt.Sprintf("Error getting keep status from %s: %v", url, err))
@@ -281,15 +254,17 @@ func GetServerStatus(arvLogger *logger.Logger,
                now := time.Now()
                arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
                        keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
                        serverInfo["status_response_processed_at"] = now
                        serverInfo["status"] = keepStatus
                })
        }
 }
 
+// CreateIndexRequest to the keep server
 func CreateIndexRequest(arvLogger *logger.Logger,
-       keepServer ServerAddress) (req *http.Request) {
+       keepServer ServerAddress,
+       arv arvadosclient.ArvadosClient) (req *http.Request) {
        url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port)
        log.Println("About to fetch keep server contents from " + url)
 
@@ -297,7 +272,7 @@ func CreateIndexRequest(arvLogger *logger.Logger,
                now := time.Now()
                arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
                        keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
                        serverInfo["index_request_sent_at"] = now
                })
        }
@@ -308,11 +283,11 @@ func CreateIndexRequest(arvLogger *logger.Logger,
                        fmt.Sprintf("Error building http request for %s: %v", url, err))
        }
 
-       req.Header.Add("Authorization",
-               fmt.Sprintf("OAuth2 %s", GetDataManagerToken(arvLogger)))
+       req.Header.Add("Authorization", "OAuth2 "+arv.ApiToken)
        return
 }
 
+// ReadServerResponse reads reasponse from keep server
 func ReadServerResponse(arvLogger *logger.Logger,
        keepServer ServerAddress,
        resp *http.Response) (response ServerResponse) {
@@ -328,7 +303,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
                now := time.Now()
                arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
                        keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
                        serverInfo["index_response_received_at"] = now
                })
        }
@@ -375,7 +350,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
 
                if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
                        // This server returned multiple lines containing the same block digest.
-                       numDuplicates += 1
+                       numDuplicates++
                        // Keep the block that's newer.
                        if storedBlock.Mtime < blockInfo.Mtime {
                                response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
@@ -396,7 +371,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
                now := time.Now()
                arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
                        keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
 
                        serverInfo["processing_finished_at"] = now
                        serverInfo["lines_received"] = numLines
@@ -439,11 +414,12 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
        return
 }
 
+// Summarize results from keep server
 func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) {
        readServers.BlockReplicationCounts = make(map[int]int)
        for _, infos := range readServers.BlockToServers {
                replication := len(infos)
-               readServers.BlockReplicationCounts[replication] += 1
+               readServers.BlockReplicationCounts[replication]++
        }
 
        if arvLogger != nil {
@@ -452,24 +428,26 @@ func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) {
                        keepInfo["distinct_blocks_stored"] = len(readServers.BlockToServers)
                })
        }
-
 }
 
+// TrashRequest struct
 type TrashRequest struct {
        Locator    string `json:"locator"`
        BlockMtime int64  `json:"block_mtime"`
 }
 
+// TrashList is an array of TrashRequest objects
 type TrashList []TrashRequest
 
-func SendTrashLists(dataManagerToken string, kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) {
+// SendTrashLists to trash queue
+func SendTrashLists(kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) {
        count := 0
        barrier := make(chan error)
 
        client := kc.Client
 
        for url, v := range spl {
-               count += 1
+               count++
                log.Printf("Sending trash list to %v", url)
 
                go (func(url string, v TrashList) {
@@ -487,8 +465,7 @@ func SendTrashLists(dataManagerToken string, kc *keepclient.KeepClient, spl map[
                                return
                        }
 
-                       // Add api token header
-                       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", dataManagerToken))
+                       req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken)
 
                        // Make the request
                        var resp *http.Response
@@ -512,7 +489,7 @@ func SendTrashLists(dataManagerToken string, kc *keepclient.KeepClient, spl map[
 
        }
 
-       for i := 0; i < count; i += 1 {
+       for i := 0; i < count; i++ {
                b := <-barrier
                if b != nil {
                        errs = append(errs, b)
index f39463ed6233169a9c9509133a729d7bed1fbeb6..2ccf17d45f78419b63fc5074ab9208c7e2e2c3c0 100644 (file)
@@ -22,9 +22,9 @@ type TestHandler struct {
        request TrashList
 }
 
-func (this *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
+func (ts *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
        r := json.NewDecoder(req.Body)
-       r.Decode(&this.request)
+       r.Decode(&ts.request)
 }
 
 func (s *KeepSuite) TestSendTrashLists(c *C) {
@@ -53,7 +53,7 @@ func (s *KeepSuite) TestSendTrashLists(c *C) {
 type TestHandlerError struct {
 }
 
-func (this *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
+func (tse *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
        http.Error(writer, "I'm a teapot", 418)
 }
 
index 94f06764a1d5e6c9522050ad44f65423ece9630d..152314cf6f4cb5dadddbdf2e3288e711b4ee8b70 100644 (file)
@@ -1,13 +1,16 @@
 /* Ensures that we only have one copy of each unique string. This is
 /* not designed for concurrent access. */
+
 package summary
 
 // This code should probably be moved somewhere more universal.
 
+// CanonicalString struct
 type CanonicalString struct {
        m map[string]string
 }
 
+// Get a CanonicalString
 func (cs *CanonicalString) Get(s string) (r string) {
        if cs.m == nil {
                cs.m = make(map[string]string)
index 8c37e99ade723a5c51a1f80f6c203dce504100d3..18b3aec8190408897d2857e3e6b2a212936bc21c 100644 (file)
@@ -26,6 +26,7 @@ var (
        readDataFrom string
 )
 
+// DataFetcher to fetch data from keep servers
 type DataFetcher func(arvLogger *logger.Logger,
        readCollections *collection.ReadCollections,
        keepServerInfo *keep.ReadServers)
@@ -41,7 +42,7 @@ func init() {
                "Avoid network i/o and read summary data from this file instead. Used for development only.")
 }
 
-// Writes data we've read to a file.
+// MaybeWriteData writes data we've read to a file.
 //
 // This is useful for development, so that we don't need to read all
 // our data from the network every time we tweak something.
@@ -53,33 +54,33 @@ func MaybeWriteData(arvLogger *logger.Logger,
        keepServerInfo keep.ReadServers) bool {
        if writeDataTo == "" {
                return false
-       } else {
-               summaryFile, err := os.Create(writeDataTo)
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
-               }
-               defer summaryFile.Close()
+       }
+       summaryFile, err := os.Create(writeDataTo)
+       if err != nil {
+               loggerutil.FatalWithMessage(arvLogger,
+                       fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
+       }
+       defer summaryFile.Close()
 
-               enc := gob.NewEncoder(summaryFile)
-               data := serializedData{
-                       ReadCollections: readCollections,
-                       KeepServerInfo:  keepServerInfo}
-               err = enc.Encode(data)
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger,
-                               fmt.Sprintf("Failed to write summary data: %v", err))
-               }
-               log.Printf("Wrote summary data to: %s", writeDataTo)
-               return true
+       enc := gob.NewEncoder(summaryFile)
+       data := serializedData{
+               ReadCollections: readCollections,
+               KeepServerInfo:  keepServerInfo}
+       err = enc.Encode(data)
+       if err != nil {
+               loggerutil.FatalWithMessage(arvLogger,
+                       fmt.Sprintf("Failed to write summary data: %v", err))
        }
+       log.Printf("Wrote summary data to: %s", writeDataTo)
+       return true
 }
 
+// ShouldReadData should not be used outside of development
 func ShouldReadData() bool {
        return readDataFrom != ""
 }
 
-// Reads data that we've written to a file.
+// ReadData reads data that we've written to a file.
 //
 // This is useful for development, so that we don't need to read all
 // our data from the network every time we tweak something.
index b326c9521ab0d7b545fd52c340c2b17455ea5aa5..cc01249a624a7f4947cdcfc8dafd73dd7e347377 100644 (file)
@@ -1,4 +1,5 @@
 // Code for generating pull lists as described in https://arvados.org/projects/arvados/wiki/Keep_Design_Doc#Pull-List
+
 package summary
 
 import (
@@ -14,19 +15,21 @@ import (
        "strings"
 )
 
+// Locator is a block digest
 type Locator blockdigest.DigestWithSize
 
+// MarshalJSON encoding
 func (l Locator) MarshalJSON() ([]byte, error) {
        return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
 }
 
-// One entry in the Pull List
+// PullRequest represents one entry in the Pull List
 type PullRequest struct {
        Locator Locator  `json:"locator"`
        Servers []string `json:"servers"`
 }
 
-// The Pull List for a particular server
+// PullList for a particular server
 type PullList []PullRequest
 
 // PullListByLocator implements sort.Interface for PullList based on
@@ -49,6 +52,7 @@ func (a PullListByLocator) Less(i, j int) bool {
        return false
 }
 
+// PullServers struct
 // For a given under-replicated block, this structure represents which
 // servers should pull the specified block and which servers they can
 // pull it from.
@@ -57,8 +61,8 @@ type PullServers struct {
        From []string // Servers that already contain the specified block
 }
 
-// Creates a map from block locator to PullServers with one entry for
-// each under-replicated block.
+// ComputePullServers creates a map from block locator to PullServers
+// with one entry for each under-replicated block.
 //
 // This method ignores zero-replica blocks since there are no servers
 // to pull them from, so callers should feel free to omit them, but
@@ -78,7 +82,7 @@ func ComputePullServers(kc *keepclient.KeepClient,
                writableServers[cs.Get(url)] = struct{}{}
        }
 
-       for block, _ := range underReplicated {
+       for block := range underReplicated {
                serversStoringBlock := keepServerInfo.BlockToServers[block]
                numCopies := len(serversStoringBlock)
                numCopiesMissing := blockToDesiredReplication[block] - numCopies
@@ -109,9 +113,9 @@ func ComputePullServers(kc *keepclient.KeepClient,
        return m
 }
 
-// Creates a pull list in which the To and From fields preserve the
-// ordering of sorted servers and the contents are all canonical
-// strings.
+// CreatePullServers creates a pull list in which the To and From
+// fields preserve the ordering of sorted servers and the contents
+// are all canonical strings.
 func CreatePullServers(cs CanonicalString,
        serverHasBlock map[string]struct{},
        writableServers map[string]struct{},
@@ -142,12 +146,12 @@ func CreatePullServers(cs CanonicalString,
        return
 }
 
-// Strips the protocol prefix from a url.
+// RemoveProtocolPrefix strips the protocol prefix from a url.
 func RemoveProtocolPrefix(url string) string {
        return url[(strings.LastIndex(url, "/") + 1):]
 }
 
-// Produces a PullList for each keep server.
+// BuildPullLists produces a PullList for each keep server.
 func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
        spl = map[string]PullList{}
        // We don't worry about canonicalizing our strings here, because we
@@ -166,7 +170,7 @@ func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
        return
 }
 
-// Writes each pull list to a file.
+// WritePullLists writes each pull list to a file.
 // The filename is based on the hostname.
 //
 // This is just a hack for prototyping, it is not expected to be used
index edd760b035d066627b51f913799bb2606f4c0141..9fb0316b736c74e789f6e0edea96042c05fd0a91 100644 (file)
@@ -1,4 +1,5 @@
 // Summarizes Collection Data and Keep Server Contents.
+
 package summary
 
 // TODO(misha): Check size of blocks as well as their digest.
@@ -11,31 +12,33 @@ import (
        "sort"
 )
 
+// BlockSet is a map of blocks
 type BlockSet map[blockdigest.DigestWithSize]struct{}
 
-// Adds a single block to the set.
+// Insert adds a single block to the set.
 func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
        bs[digest] = struct{}{}
 }
 
-// Adds a set of blocks to the set.
+// Union adds a set of blocks to the set.
 func (bs BlockSet) Union(obs BlockSet) {
        for k, v := range obs {
                bs[k] = v
        }
 }
 
-// We use the collection index to save space. To convert to and from
+// CollectionIndexSet is used to save space. To convert to and from
 // the uuid, use collection.ReadCollections' fields
-// CollectionIndexToUuid and CollectionUuidToIndex.
+// CollectionIndexToUUID and CollectionUUIDToIndex.
 type CollectionIndexSet map[int]struct{}
 
-// Adds a single collection to the set. The collection is specified by
+// Insert adds a single collection to the set. The collection is specified by
 // its index.
 func (cis CollectionIndexSet) Insert(collectionIndex int) {
        cis[collectionIndex] = struct{}{}
 }
 
+// ToCollectionIndexSet gets block to collection indices
 func (bs BlockSet) ToCollectionIndexSet(
        readCollections collection.ReadCollections,
        collectionIndexSet *CollectionIndexSet) {
@@ -46,6 +49,7 @@ func (bs BlockSet) ToCollectionIndexSet(
        }
 }
 
+// ReplicationLevels struct
 // Keeps track of the requested and actual replication levels.
 // Currently this is only used for blocks but could easily be used for
 // collections as well.
@@ -59,18 +63,20 @@ type ReplicationLevels struct {
        Actual int
 }
 
-// Maps from replication levels to their blocks.
+// ReplicationLevelBlockSetMap maps from replication levels to their blocks.
 type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
 
-// An individual entry from ReplicationLevelBlockSetMap which only reports the number of blocks, not which blocks.
+// ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap
+// which only reports the number of blocks, not which blocks.
 type ReplicationLevelBlockCount struct {
        Levels ReplicationLevels
        Count  int
 }
 
-// An ordered list of ReplicationLevelBlockCount useful for reporting.
+// ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting.
 type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
 
+// ReplicationSummary sturct
 type ReplicationSummary struct {
        CollectionBlocksNotInKeep  BlockSet
        UnderReplicatedBlocks      BlockSet
@@ -84,7 +90,7 @@ type ReplicationSummary struct {
        CorrectlyReplicatedCollections CollectionIndexSet
 }
 
-// This struct counts the elements in each set in ReplicationSummary.
+// ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary.
 type ReplicationSummaryCounts struct {
        CollectionBlocksNotInKeep      int
        UnderReplicatedBlocks          int
@@ -97,8 +103,8 @@ type ReplicationSummaryCounts struct {
        CorrectlyReplicatedCollections int
 }
 
-// Gets the BlockSet for a given set of ReplicationLevels, creating it
-// if it doesn't already exist.
+// GetOrCreate gets the BlockSet for a given set of ReplicationLevels,
+// creating it if it doesn't already exist.
 func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
        repLevels ReplicationLevels) (bs BlockSet) {
        bs, exists := rlbs[repLevels]
@@ -109,21 +115,21 @@ func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
        return
 }
 
-// Adds a block to the set for a given replication level.
+// Insert adds a block to the set for a given replication level.
 func (rlbs ReplicationLevelBlockSetMap) Insert(
        repLevels ReplicationLevels,
        block blockdigest.DigestWithSize) {
        rlbs.GetOrCreate(repLevels).Insert(block)
 }
 
-// Adds a set of blocks to the set for a given replication level.
+// Union adds a set of blocks to the set for a given replication level.
 func (rlbs ReplicationLevelBlockSetMap) Union(
        repLevels ReplicationLevels,
        bs BlockSet) {
        rlbs.GetOrCreate(repLevels).Union(bs)
 }
 
-// Outputs a sorted list of ReplicationLevelBlockCounts.
+// Counts outputs a sorted list of ReplicationLevelBlockCounts.
 func (rlbs ReplicationLevelBlockSetMap) Counts() (
        sorted ReplicationLevelBlockSetSlice) {
        sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
@@ -153,6 +159,7 @@ func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) {
        rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
 }
 
+// ComputeCounts returns ReplicationSummaryCounts
 func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
        // TODO(misha): Consider rewriting this method to iterate through
        // the fields using reflection, instead of explictily listing the
@@ -169,6 +176,7 @@ func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
        return rsc
 }
 
+// PrettyPrint ReplicationSummaryCounts
 func (rsc ReplicationSummaryCounts) PrettyPrint() string {
        return fmt.Sprintf("Replication Block Counts:"+
                "\n Missing From Keep: %d, "+
@@ -192,12 +200,13 @@ func (rsc ReplicationSummaryCounts) PrettyPrint() string {
                rsc.CorrectlyReplicatedCollections)
 }
 
+// BucketReplication returns ReplicationLevelBlockSetMap
 func BucketReplication(readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) (rlbsm ReplicationLevelBlockSetMap) {
-       rlbsm = make(ReplicationLevelBlockSetMap)
+       keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) {
+       rlbs = make(ReplicationLevelBlockSetMap)
 
        for block, requestedReplication := range readCollections.BlockToDesiredReplication {
-               rlbsm.Insert(
+               rlbs.Insert(
                        ReplicationLevels{
                                Requested: requestedReplication,
                                Actual:    len(keepServerInfo.BlockToServers[block])},
@@ -206,7 +215,7 @@ func BucketReplication(readCollections collection.ReadCollections,
 
        for block, servers := range keepServerInfo.BlockToServers {
                if 0 == readCollections.BlockToDesiredReplication[block] {
-                       rlbsm.Insert(
+                       rlbs.Insert(
                                ReplicationLevels{Requested: 0, Actual: len(servers)},
                                block)
                }
@@ -214,7 +223,8 @@ func BucketReplication(readCollections collection.ReadCollections,
        return
 }
 
-func (rlbsm ReplicationLevelBlockSetMap) SummarizeBuckets(
+// SummarizeBuckets reads collections and summarizes
+func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
        readCollections collection.ReadCollections) (
        rs ReplicationSummary) {
        rs.CollectionBlocksNotInKeep = make(BlockSet)
@@ -228,7 +238,7 @@ func (rlbsm ReplicationLevelBlockSetMap) SummarizeBuckets(
        rs.OverReplicatedCollections = make(CollectionIndexSet)
        rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
 
-       for levels, bs := range rlbsm {
+       for levels, bs := range rlbs {
                if levels.Actual == 0 {
                        rs.CollectionBlocksNotInKeep.Union(bs)
                } else if levels.Requested == 0 {
@@ -254,7 +264,7 @@ func (rlbsm ReplicationLevelBlockSetMap) SummarizeBuckets(
        rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
                &rs.OverReplicatedCollections)
 
-       for i := range readCollections.CollectionIndexToUuid {
+       for i := range readCollections.CollectionIndexToUUID {
                if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
                } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
                } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
index ea76df4d34043ea1f706ca89a82b699b52c806ca..cc4eb92560b26b385378ffa6d947abb2bc9f0168 100644 (file)
@@ -215,6 +215,6 @@ func TestMixedReplication(t *testing.T) {
        returnedSummary := SummarizeReplication(rc, keepInfo)
 
        if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUuid, rc.BlockToCollectionIndices)
+               t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUUID, rc.BlockToCollectionIndices)
        }
 }
index 0bedc9cc3a6cd80854185f99867257e0727448af..b6ceacecde2b8e2ffe810deea9e3777aade06625 100644 (file)
@@ -1,4 +1,5 @@
 // Code for generating trash lists
+
 package summary
 
 import (
@@ -9,6 +10,7 @@ import (
        "time"
 )
 
+// BuildTrashLists builds list of blocks to be sent to trash queue
 func BuildTrashLists(kc *keepclient.KeepClient,
        keepServerInfo *keep.ReadServers,
        keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList, err error) {
@@ -40,19 +42,19 @@ func buildTrashListsInternal(writableServers map[string]struct{},
        m = make(map[string]keep.TrashList)
 
        for block := range keepBlocksNotInCollections {
-               for _, block_on_server := range keepServerInfo.BlockToServers[block] {
-                       if block_on_server.Mtime >= expiry {
+               for _, blockOnServer := range keepServerInfo.BlockToServers[block] {
+                       if blockOnServer.Mtime >= expiry {
                                continue
                        }
 
                        // block is older than expire cutoff
-                       srv := keepServerInfo.KeepServerIndexToAddress[block_on_server.ServerIndex].String()
+                       srv := keepServerInfo.KeepServerIndexToAddress[blockOnServer.ServerIndex].String()
 
                        if _, writable := writableServers[srv]; !writable {
                                continue
                        }
 
-                       m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: block_on_server.Mtime})
+                       m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: blockOnServer.Mtime})
                }
        }
        return
index 7620631a157688adbf06f4db90e10edebb7fb4b5..555211fe0275e9a42b49625557f8d505999b9c2d 100644 (file)
@@ -34,7 +34,7 @@ func (s *TrashSuite) TestBuildTrashLists(c *C) {
                                keep.BlockServerInfo{1, 101}}}}
 
        // only block0 is in delete set
-       var bs BlockSet = make(BlockSet)
+       var bs = make(BlockSet)
        bs[block0] = struct{}{}
 
        // Test trash list where only sv0 is on writable list.
index a799ffe07e433e7af3774bd7259aec212d001740..5944638092f9453a1cd5ea49d8d3e8924aba93b3 100644 (file)
@@ -25,6 +25,7 @@ setup(name="arvados-docker-cleaner",
         'docker-py',
         ],
       tests_require=[
+        'pbr<1.7.0',
         'mock',
         ],
       test_suite='tests',
index 3833493598cf05e288dc31f0153732dbccdca907..d7ac773f35198b724fbc91db877026035bb832eb 100644 (file)
@@ -35,7 +35,7 @@ setup(name='arvados_fuse',
         'ciso8601'
         ],
       test_suite='tests',
-      tests_require=['mock>=1.0', 'PyYAML'],
+      tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )
index d0af4a58ea5e7746d8243fb6272820e8c4801307..7900096caf0ad9e80dfe13fde5970dbd0876db27 100644 (file)
@@ -241,6 +241,11 @@ type PutBlockHandler struct {
        *ApiTokenCache
 }
 
+type IndexHandler struct {
+       *keepclient.KeepClient
+       *ApiTokenCache
+}
+
 type InvalidPathHandler struct{}
 
 type OptionsHandler struct{}
@@ -262,6 +267,12 @@ func MakeRESTRouter(
                rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`,
                        GetBlockHandler{kc, t}).Methods("GET", "HEAD")
                rest.Handle(`/{locator:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
+
+               // List all blocks
+               rest.Handle(`/index`, IndexHandler{kc, t}).Methods("GET")
+
+               // List blocks whose hash has the given prefix
+               rest.Handle(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler{kc, t}).Methods("GET")
        }
 
        if enable_put {
@@ -481,3 +492,63 @@ func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
                status = http.StatusBadGateway
        }
 }
+
+// ServeHTTP implementation for IndexHandler
+// Supports only GET requests for /index/{prefix:[0-9a-f]{0,32}}
+// For each keep server found in LocalRoots:
+//   Invokes GetIndex using keepclient
+//   Expects "complete" response (terminating with blank new line)
+//   Aborts on any errors
+// Concatenates responses from all those keep servers and returns
+func (handler IndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       SetCorsHeaders(resp)
+
+       prefix := mux.Vars(req)["prefix"]
+       var err error
+       var status int
+
+       defer func() {
+               if status != http.StatusOK {
+                       http.Error(resp, err.Error(), status)
+               }
+       }()
+
+       kc := *handler.KeepClient
+
+       ok, token := CheckAuthorizationHeader(kc, handler.ApiTokenCache, req)
+       if !ok {
+               status, err = http.StatusForbidden, BadAuthorizationHeader
+               return
+       }
+
+       // Copy ArvadosClient struct and use the client's API token
+       arvclient := *kc.Arvados
+       arvclient.ApiToken = token
+       kc.Arvados = &arvclient
+
+       // Only GET method is supported
+       if req.Method != "GET" {
+               status, err = http.StatusNotImplemented, MethodNotSupported
+               return
+       }
+
+       // Get index from all LocalRoots and write to resp
+       var reader io.Reader
+       for uuid := range kc.LocalRoots() {
+               reader, err = kc.GetIndex(uuid, prefix)
+               if err != nil {
+                       status = http.StatusBadGateway
+                       return
+               }
+
+               _, err = io.Copy(resp, reader)
+               if err != nil {
+                       status = http.StatusBadGateway
+                       return
+               }
+       }
+
+       // Got index from all the keep servers and wrote to resp
+       status = http.StatusOK
+       resp.Write([]byte("\n"))
+}
index 5bd832b511a59c5367af117f7d76acd1b84a1f66..7643e4b0fa2225492caae1dd0aff3428505bd86d 100644 (file)
@@ -53,7 +53,7 @@ func closeListener() {
 
 func (s *ServerRequiredSuite) SetUpSuite(c *C) {
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 }
 
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
@@ -61,7 +61,7 @@ func (s *ServerRequiredSuite) SetUpTest(c *C) {
 }
 
 func (s *ServerRequiredSuite) TearDownSuite(c *C) {
-       arvadostest.StopKeep()
+       arvadostest.StopKeep(2)
        arvadostest.StopAPI()
 }
 
@@ -386,8 +386,8 @@ func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) {
                c.Check(err, Equals, nil)
                body, err := ioutil.ReadAll(resp.Body)
                c.Check(err, Equals, nil)
-               c.Check(string(body), Equals,
-                       fmt.Sprintf("%x+%d", md5.Sum([]byte("qux")), 3))
+               c.Check(string(body), Matches,
+                       fmt.Sprintf(`^%x\+3(\+.+)?$`, md5.Sum([]byte("qux"))))
        }
 }
 
@@ -406,3 +406,70 @@ func (s *ServerRequiredSuite) TestStripHint(c *C) {
                "http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73")
 
 }
+
+// Test GetIndex
+//   Put one block, with 2 replicas
+//   With no prefix (expect the block locator, twice)
+//   With an existing prefix (expect the block locator, twice)
+//   With a valid but non-existing prefix (expect "\n")
+//   With an invalid prefix (expect error)
+func (s *ServerRequiredSuite) TestGetIndex(c *C) {
+       kc := runProxy(c, []string{"keepproxy"}, 28852, false)
+       waitForListener()
+       defer closeListener()
+
+       // Put "index-data" blocks
+       data := []byte("index-data")
+       hash := fmt.Sprintf("%x", md5.Sum(data))
+
+       hash2, rep, err := kc.PutB(data)
+       c.Check(hash2, Matches, fmt.Sprintf(`^%s\+10(\+.+)?$`, hash))
+       c.Check(rep, Equals, 2)
+       c.Check(err, Equals, nil)
+
+       reader, blocklen, _, err := kc.Get(hash)
+       c.Assert(err, Equals, nil)
+       c.Check(blocklen, Equals, int64(10))
+       all, err := ioutil.ReadAll(reader)
+       c.Check(all, DeepEquals, data)
+
+       // Put some more blocks
+       _, rep, err = kc.PutB([]byte("some-more-index-data"))
+       c.Check(err, Equals, nil)
+
+       // Invoke GetIndex
+       for _, spec := range []struct {
+               prefix         string
+               expectTestHash bool
+               expectOther    bool
+       }{
+               {"", true, true},         // with no prefix
+               {hash[:3], true, false},  // with matching prefix
+               {"abcdef", false, false}, // with no such prefix
+       } {
+               indexReader, err := kc.GetIndex("proxy", spec.prefix)
+               c.Assert(err, Equals, nil)
+               indexResp, err := ioutil.ReadAll(indexReader)
+               c.Assert(err, Equals, nil)
+               locators := strings.Split(string(indexResp), "\n")
+               gotTestHash := 0
+               gotOther := 0
+               for _, locator := range locators {
+                       if locator == "" {
+                               continue
+                       }
+                       c.Check(locator[:len(spec.prefix)], Equals, spec.prefix)
+                       if locator[:32] == hash {
+                               gotTestHash++
+                       } else {
+                               gotOther++
+                       }
+               }
+               c.Check(gotTestHash == 2, Equals, spec.expectTestHash)
+               c.Check(gotOther > 0, Equals, spec.expectOther)
+       }
+
+       // GetIndex with invalid prefix
+       _, err = kc.GetIndex("proxy", "xyz")
+       c.Assert((err != nil), Equals, true)
+}
diff --git a/services/keepproxy/pkg-extras/etc/default/keepproxy b/services/keepproxy/pkg-extras/etc/default/keepproxy
new file mode 100644 (file)
index 0000000..ddcab10
--- /dev/null
@@ -0,0 +1,7 @@
+user="root"
+group="root"
+chroot="/"
+chdir="/"
+nice=""
+args="-listen=':9100'"
+
diff --git a/services/keepproxy/pkg-extras/etc/init.d/keepproxy b/services/keepproxy/pkg-extras/etc/init.d/keepproxy
new file mode 100755 (executable)
index 0000000..7bb35b9
--- /dev/null
@@ -0,0 +1,156 @@
+#!/bin/sh
+# Init script for keepproxy
+# Maintained by 
+# Generated by pleaserun.
+# Implemented based on LSB Core 3.1:
+#   * Sections: 20.2, 20.3
+#
+### BEGIN INIT INFO
+# Provides:          keepproxy
+# Required-Start:    $remote_fs $syslog
+# Required-Stop:     $remote_fs $syslog
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# Short-Description: 
+# Description:       no description given
+### END INIT INFO
+
+PATH=/sbin:/usr/sbin:/bin:/usr/bin
+export PATH
+
+name=keepproxy
+program=/usr/bin/keepproxy
+args=''
+pidfile="/var/run/$name.pid"
+
+[ -r /etc/default/$name ] && . /etc/default/$name
+[ -r /etc/sysconfig/$name ] && . /etc/sysconfig/$name
+
+trace() {
+  logger -t "/etc/init.d/keepproxy" "$@"
+}
+
+emit() {
+  trace "$@"
+  echo "$@"
+}
+
+start() {
+
+  # Ensure the log directory is setup correctly.
+  [ ! -d "/var/log/" ] && mkdir "/var/log/"
+  chown "$user":"$group" "/var/log/"
+  chmod 755 "/var/log/"
+
+
+  # Setup any environmental stuff beforehand
+  
+
+  # Run the program!
+  
+  chroot --userspec "$user":"$group" "$chroot" sh -c "
+    
+    cd \"$chdir\"
+    exec \"$program\" $args
+  " >> /var/log/keepproxy.stdout 2>> /var/log/keepproxy.stderr &
+
+  # Generate the pidfile from here. If we instead made the forked process
+  # generate it there will be a race condition between the pidfile writing
+  # and a process possibly asking for status.
+  echo $! > $pidfile
+
+  emit "$name started"
+  return 0
+}
+
+stop() {
+  # Try a few times to kill TERM the program
+  if status ; then
+    pid=$(cat "$pidfile")
+    trace "Killing $name (pid $pid) with SIGTERM"
+    kill -TERM $pid
+    # Wait for it to exit.
+    for i in 1 2 3 4 5 ; do
+      trace "Waiting $name (pid $pid) to die..."
+      status || break
+      sleep 1
+    done
+    if status ; then
+      emit "$name stop failed; still running."
+    else
+      emit "$name stopped."
+    fi
+  fi
+}
+
+status() {
+  if [ -f "$pidfile" ] ; then
+    pid=$(cat "$pidfile")
+    if ps -p $pid > /dev/null 2> /dev/null ; then
+      # process by this pid is running.
+      # It may not be our pid, but that's what you get with just pidfiles.
+      # TODO(sissel): Check if this process seems to be the same as the one we
+      # expect. It'd be nice to use flock here, but flock uses fork, not exec,
+      # so it makes it quite awkward to use in this case.
+      return 0
+    else
+      return 2 # program is dead but pid file exists
+    fi
+  else
+    return 3 # program is not running
+  fi
+}
+
+force_stop() {
+  if status ; then
+    stop
+    status && kill -KILL $(cat "$pidfile")
+  fi
+}
+
+
+case "$1" in
+  force-start|start|stop|force-stop|restart)
+    trace "Attempting '$1' on keepproxy"
+    ;;
+esac
+
+case "$1" in
+  force-start)
+    PRESTART=no
+    exec "$0" start
+    ;;
+  start)
+    status
+    code=$?
+    if [ $code -eq 0 ]; then
+      emit "$name is already running"
+      exit $code
+    else
+      start
+      exit $?
+    fi
+    ;;
+  stop) stop ;;
+  force-stop) force_stop ;;
+  status) 
+    status
+    code=$?
+    if [ $code -eq 0 ] ; then
+      emit "$name is running"
+    else
+      emit "$name is not running"
+    fi
+    exit $code
+    ;;
+  restart) 
+    
+    stop && start 
+    ;;
+  *)
+    echo "Usage: $SCRIPTNAME {start|force-start|stop|force-start|force-stop|status|restart}" >&2
+    exit 3
+  ;;
+esac
+
+exit $?
diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go
new file mode 100644 (file)
index 0000000..e9fda2a
--- /dev/null
@@ -0,0 +1,322 @@
+package main
+
+import (
+       "bytes"
+       "errors"
+       "flag"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "log"
+       "os"
+       "regexp"
+       "strings"
+       "time"
+
+       "github.com/curoverse/azure-sdk-for-go/storage"
+)
+
+var (
+       azureStorageAccountName    string
+       azureStorageAccountKeyFile string
+       azureStorageReplication    int
+       azureWriteRaceInterval     = 15 * time.Second
+       azureWriteRacePollTime     = time.Second
+)
+
+func readKeyFromFile(file string) (string, error) {
+       buf, err := ioutil.ReadFile(file)
+       if err != nil {
+               return "", errors.New("reading key from " + file + ": " + err.Error())
+       }
+       accountKey := strings.TrimSpace(string(buf))
+       if accountKey == "" {
+               return "", errors.New("empty account key in " + file)
+       }
+       return accountKey, nil
+}
+
+type azureVolumeAdder struct {
+       *volumeSet
+}
+
+func (s *azureVolumeAdder) Set(containerName string) error {
+       if containerName == "" {
+               return errors.New("no container name given")
+       }
+       if azureStorageAccountName == "" || azureStorageAccountKeyFile == "" {
+               return errors.New("-azure-storage-account-name and -azure-storage-account-key-file arguments must given before -azure-storage-container-volume")
+       }
+       accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
+       if err != nil {
+               return err
+       }
+       azClient, err := storage.NewBasicClient(azureStorageAccountName, accountKey)
+       if err != nil {
+               return errors.New("creating Azure storage client: " + err.Error())
+       }
+       if flagSerializeIO {
+               log.Print("Notice: -serialize is not supported by azure-blob-container volumes.")
+       }
+       v := NewAzureBlobVolume(azClient, containerName, flagReadonly, azureStorageReplication)
+       if err := v.Check(); err != nil {
+               return err
+       }
+       *s.volumeSet = append(*s.volumeSet, v)
+       return nil
+}
+
+func init() {
+       flag.Var(&azureVolumeAdder{&volumes},
+               "azure-storage-container-volume",
+               "Use the given container as a storage volume. Can be given multiple times.")
+       flag.StringVar(
+               &azureStorageAccountName,
+               "azure-storage-account-name",
+               "",
+               "Azure storage account name used for subsequent --azure-storage-container-volume arguments.")
+       flag.StringVar(
+               &azureStorageAccountKeyFile,
+               "azure-storage-account-key-file",
+               "",
+               "File containing the account key used for subsequent --azure-storage-container-volume arguments.")
+       flag.IntVar(
+               &azureStorageReplication,
+               "azure-storage-replication",
+               3,
+               "Replication level to report to clients when data is stored in an Azure container.")
+}
+
+// An AzureBlobVolume stores and retrieves blocks in an Azure Blob
+// container.
+type AzureBlobVolume struct {
+       azClient      storage.Client
+       bsClient      storage.BlobStorageClient
+       containerName string
+       readonly      bool
+       replication   int
+}
+
+// NewAzureBlobVolume returns a new AzureBlobVolume using the given
+// client and container name. The replication argument specifies the
+// replication level to report when writing data.
+func NewAzureBlobVolume(client storage.Client, containerName string, readonly bool, replication int) *AzureBlobVolume {
+       return &AzureBlobVolume{
+               azClient:      client,
+               bsClient:      client.GetBlobService(),
+               containerName: containerName,
+               readonly:      readonly,
+               replication:   replication,
+       }
+}
+
+// Check returns nil if the volume is usable.
+func (v *AzureBlobVolume) Check() error {
+       ok, err := v.bsClient.ContainerExists(v.containerName)
+       if err != nil {
+               return err
+       }
+       if !ok {
+               return errors.New("container does not exist")
+       }
+       return nil
+}
+
+// Get reads a Keep block that has been stored as a block blob in the
+// container.
+//
+// If the block is younger than azureWriteRaceInterval and is
+// unexpectedly empty, assume a PutBlob operation is in progress, and
+// wait for it to finish writing.
+func (v *AzureBlobVolume) Get(loc string) ([]byte, error) {
+       var deadline time.Time
+       haveDeadline := false
+       buf, err := v.get(loc)
+       for err == nil && len(buf) == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
+               // Seeing a brand new empty block probably means we're
+               // in a race with CreateBlob, which under the hood
+               // (apparently) does "CreateEmpty" and "CommitData"
+               // with no additional transaction locking.
+               if !haveDeadline {
+                       t, err := v.Mtime(loc)
+                       if err != nil {
+                               log.Print("Got empty block (possible race) but Mtime failed: ", err)
+                               break
+                       }
+                       deadline = t.Add(azureWriteRaceInterval)
+                       if time.Now().After(deadline) {
+                               break
+                       }
+                       log.Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
+                       haveDeadline = true
+               } else if time.Now().After(deadline) {
+                       break
+               }
+               bufs.Put(buf)
+               time.Sleep(azureWriteRacePollTime)
+               buf, err = v.get(loc)
+       }
+       if haveDeadline {
+               log.Printf("Race ended with len(buf)==%d", len(buf))
+       }
+       return buf, err
+}
+
+func (v *AzureBlobVolume) get(loc string) ([]byte, error) {
+       rdr, err := v.bsClient.GetBlob(v.containerName, loc)
+       if err != nil {
+               return nil, v.translateError(err)
+       }
+       defer rdr.Close()
+       buf := bufs.Get(BlockSize)
+       n, err := io.ReadFull(rdr, buf)
+       switch err {
+       case nil, io.EOF, io.ErrUnexpectedEOF:
+               return buf[:n], nil
+       default:
+               bufs.Put(buf)
+               return nil, err
+       }
+}
+
+// Compare the given data with existing stored data.
+func (v *AzureBlobVolume) Compare(loc string, expect []byte) error {
+       rdr, err := v.bsClient.GetBlob(v.containerName, loc)
+       if err != nil {
+               return v.translateError(err)
+       }
+       defer rdr.Close()
+       return compareReaderWithBuf(rdr, expect, loc[:32])
+}
+
+// Put sotres a Keep block as a block blob in the container.
+func (v *AzureBlobVolume) Put(loc string, block []byte) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       return v.bsClient.CreateBlockBlobFromReader(v.containerName, loc, uint64(len(block)), bytes.NewReader(block))
+}
+
+// Touch updates the last-modified property of a block blob.
+func (v *AzureBlobVolume) Touch(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       return v.bsClient.SetBlobMetadata(v.containerName, loc, map[string]string{
+               "touch": fmt.Sprintf("%d", time.Now()),
+       })
+}
+
+// Mtime returns the last-modified property of a block blob.
+func (v *AzureBlobVolume) Mtime(loc string) (time.Time, error) {
+       props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
+       if err != nil {
+               return time.Time{}, err
+       }
+       return time.Parse(time.RFC1123, props.LastModified)
+}
+
+// IndexTo writes a list of Keep blocks that are stored in the
+// container.
+func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error {
+       params := storage.ListBlobsParameters{
+               Prefix: prefix,
+       }
+       for {
+               resp, err := v.bsClient.ListBlobs(v.containerName, params)
+               if err != nil {
+                       return err
+               }
+               for _, b := range resp.Blobs {
+                       t, err := time.Parse(time.RFC1123, b.Properties.LastModified)
+                       if err != nil {
+                               return err
+                       }
+                       if !v.isKeepBlock(b.Name) {
+                               continue
+                       }
+                       if b.Properties.ContentLength == 0 && t.Add(azureWriteRaceInterval).After(time.Now()) {
+                               // A new zero-length blob is probably
+                               // just a new non-empty blob that
+                               // hasn't committed its data yet (see
+                               // Get()), and in any case has no
+                               // value.
+                               continue
+                       }
+                       fmt.Fprintf(writer, "%s+%d %d\n", b.Name, b.Properties.ContentLength, t.Unix())
+               }
+               if resp.NextMarker == "" {
+                       return nil
+               }
+               params.Marker = resp.NextMarker
+       }
+}
+
+// Delete a Keep block.
+func (v *AzureBlobVolume) Delete(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
+       // Ideally we would use If-Unmodified-Since, but that
+       // particular condition seems to be ignored by Azure. Instead,
+       // we get the Etag before checking Mtime, and use If-Match to
+       // ensure we don't delete data if Put() or Touch() happens
+       // between our calls to Mtime() and DeleteBlob().
+       props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
+       if err != nil {
+               return err
+       }
+       if t, err := v.Mtime(loc); err != nil {
+               return err
+       } else if time.Since(t) < blobSignatureTTL {
+               return nil
+       }
+       return v.bsClient.DeleteBlob(v.containerName, loc, map[string]string{
+               "If-Match": props.Etag,
+       })
+}
+
+// Status returns a VolumeStatus struct with placeholder data.
+func (v *AzureBlobVolume) Status() *VolumeStatus {
+       return &VolumeStatus{
+               DeviceNum: 1,
+               BytesFree: BlockSize * 1000,
+               BytesUsed: 1,
+       }
+}
+
+// String returns a volume label, including the container name.
+func (v *AzureBlobVolume) String() string {
+       return fmt.Sprintf("azure-storage-container:%+q", v.containerName)
+}
+
+// Writable returns true, unless the -readonly flag was on when the
+// volume was added.
+func (v *AzureBlobVolume) Writable() bool {
+       return !v.readonly
+}
+
+// Replication returns the replication level of the container, as
+// specified by the -azure-storage-replication argument.
+func (v *AzureBlobVolume) Replication() int {
+       return v.replication
+}
+
+// If possible, translate an Azure SDK error to a recognizable error
+// like os.ErrNotExist.
+func (v *AzureBlobVolume) translateError(err error) error {
+       switch {
+       case err == nil:
+               return err
+       case strings.Contains(err.Error(), "404 Not Found"):
+               // "storage: service returned without a response body (404 Not Found)"
+               return os.ErrNotExist
+       default:
+               return err
+       }
+}
+
+var keepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
+func (v *AzureBlobVolume) isKeepBlock(s string) bool {
+       return keepBlockRegexp.MatchString(s)
+}
diff --git a/services/keepstore/azure_blob_volume_test.go b/services/keepstore/azure_blob_volume_test.go
new file mode 100644 (file)
index 0000000..a240c23
--- /dev/null
@@ -0,0 +1,480 @@
+package main
+
+import (
+       "bytes"
+       "encoding/base64"
+       "encoding/xml"
+       "flag"
+       "fmt"
+       "io/ioutil"
+       "log"
+       "math/rand"
+       "net"
+       "net/http"
+       "net/http/httptest"
+       "regexp"
+       "sort"
+       "strconv"
+       "strings"
+       "sync"
+       "testing"
+       "time"
+
+       "github.com/curoverse/azure-sdk-for-go/storage"
+)
+
+const (
+       // The same fake credentials used by Microsoft's Azure emulator
+       emulatorAccountName = "devstoreaccount1"
+       emulatorAccountKey  = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+)
+
+var azureTestContainer string
+
+func init() {
+       flag.StringVar(
+               &azureTestContainer,
+               "test.azure-storage-container-volume",
+               "",
+               "Name of Azure container to use for testing. Do not use a container with real data! Use -azure-storage-account-name and -azure-storage-key-file arguments to supply credentials.")
+}
+
+type azBlob struct {
+       Data        []byte
+       Etag        string
+       Metadata    map[string]string
+       Mtime       time.Time
+       Uncommitted map[string][]byte
+}
+
+type azStubHandler struct {
+       sync.Mutex
+       blobs map[string]*azBlob
+       race  chan chan struct{}
+}
+
+func newAzStubHandler() *azStubHandler {
+       return &azStubHandler{
+               blobs: make(map[string]*azBlob),
+       }
+}
+
+func (h *azStubHandler) TouchWithDate(container, hash string, t time.Time) {
+       blob, ok := h.blobs[container+"|"+hash]
+       if !ok {
+               return
+       }
+       blob.Mtime = t
+}
+
+func (h *azStubHandler) PutRaw(container, hash string, data []byte) {
+       h.Lock()
+       defer h.Unlock()
+       h.blobs[container+"|"+hash] = &azBlob{
+               Data:        data,
+               Mtime:       time.Now(),
+               Uncommitted: make(map[string][]byte),
+       }
+}
+
+func (h *azStubHandler) unlockAndRace() {
+       if h.race == nil {
+               return
+       }
+       h.Unlock()
+       // Signal caller that race is starting by reading from
+       // h.race. If we get a channel, block until that channel is
+       // ready to receive. If we get nil (or h.race is closed) just
+       // proceed.
+       if c := <-h.race; c != nil {
+               c <- struct{}{}
+       }
+       h.Lock()
+}
+
+func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
+       h.Lock()
+       defer h.Unlock()
+       // defer log.Printf("azStubHandler: %+v", r)
+
+       path := strings.Split(r.URL.Path, "/")
+       container := path[1]
+       hash := ""
+       if len(path) > 2 {
+               hash = path[2]
+       }
+
+       if err := r.ParseForm(); err != nil {
+               log.Printf("azStubHandler(%+v): %s", r, err)
+               rw.WriteHeader(http.StatusBadRequest)
+               return
+       }
+
+       body, err := ioutil.ReadAll(r.Body)
+       if err != nil {
+               return
+       }
+
+       type blockListRequestBody struct {
+               XMLName     xml.Name `xml:"BlockList"`
+               Uncommitted []string
+       }
+
+       blob, blobExists := h.blobs[container+"|"+hash]
+
+       switch {
+       case r.Method == "PUT" && r.Form.Get("comp") == "":
+               // "Put Blob" API
+               if _, ok := h.blobs[container+"|"+hash]; !ok {
+                       // Like the real Azure service, we offer a
+                       // race window during which other clients can
+                       // list/get the new blob before any data is
+                       // committed.
+                       h.blobs[container+"|"+hash] = &azBlob{
+                               Mtime:       time.Now(),
+                               Uncommitted: make(map[string][]byte),
+                               Etag:        makeEtag(),
+                       }
+                       h.unlockAndRace()
+               }
+               h.blobs[container+"|"+hash] = &azBlob{
+                       Data:        body,
+                       Mtime:       time.Now(),
+                       Uncommitted: make(map[string][]byte),
+                       Etag:        makeEtag(),
+               }
+               rw.WriteHeader(http.StatusCreated)
+       case r.Method == "PUT" && r.Form.Get("comp") == "block":
+               // "Put Block" API
+               if !blobExists {
+                       log.Printf("Got block for nonexistent blob: %+v", r)
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               blockID, err := base64.StdEncoding.DecodeString(r.Form.Get("blockid"))
+               if err != nil || len(blockID) == 0 {
+                       log.Printf("Invalid blockid: %+q", r.Form.Get("blockid"))
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               blob.Uncommitted[string(blockID)] = body
+               rw.WriteHeader(http.StatusCreated)
+       case r.Method == "PUT" && r.Form.Get("comp") == "blocklist":
+               // "Put Block List" API
+               bl := &blockListRequestBody{}
+               if err := xml.Unmarshal(body, bl); err != nil {
+                       log.Printf("xml Unmarshal: %s", err)
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               for _, encBlockID := range bl.Uncommitted {
+                       blockID, err := base64.StdEncoding.DecodeString(encBlockID)
+                       if err != nil || len(blockID) == 0 || blob.Uncommitted[string(blockID)] == nil {
+                               log.Printf("Invalid blockid: %+q", encBlockID)
+                               rw.WriteHeader(http.StatusBadRequest)
+                               return
+                       }
+                       blob.Data = blob.Uncommitted[string(blockID)]
+                       blob.Etag = makeEtag()
+                       blob.Mtime = time.Now()
+                       delete(blob.Uncommitted, string(blockID))
+               }
+               rw.WriteHeader(http.StatusCreated)
+       case r.Method == "PUT" && r.Form.Get("comp") == "metadata":
+               // "Set Metadata Headers" API. We don't bother
+               // stubbing "Get Metadata Headers": AzureBlobVolume
+               // sets metadata headers only as a way to bump Etag
+               // and Last-Modified.
+               if !blobExists {
+                       log.Printf("Got metadata for nonexistent blob: %+v", r)
+                       rw.WriteHeader(http.StatusBadRequest)
+                       return
+               }
+               blob.Metadata = make(map[string]string)
+               for k, v := range r.Header {
+                       if strings.HasPrefix(strings.ToLower(k), "x-ms-meta-") {
+                               blob.Metadata[k] = v[0]
+                       }
+               }
+               blob.Mtime = time.Now()
+               blob.Etag = makeEtag()
+       case (r.Method == "GET" || r.Method == "HEAD") && hash != "":
+               // "Get Blob" API
+               if !blobExists {
+                       rw.WriteHeader(http.StatusNotFound)
+                       return
+               }
+               rw.Header().Set("Last-Modified", blob.Mtime.Format(time.RFC1123))
+               rw.Header().Set("Content-Length", strconv.Itoa(len(blob.Data)))
+               if r.Method == "GET" {
+                       if _, err := rw.Write(blob.Data); err != nil {
+                               log.Printf("write %+q: %s", blob.Data, err)
+                       }
+               }
+               h.unlockAndRace()
+       case r.Method == "DELETE" && hash != "":
+               // "Delete Blob" API
+               if !blobExists {
+                       rw.WriteHeader(http.StatusNotFound)
+                       return
+               }
+               delete(h.blobs, container+"|"+hash)
+               rw.WriteHeader(http.StatusAccepted)
+       case r.Method == "GET" && r.Form.Get("comp") == "list" && r.Form.Get("restype") == "container":
+               // "List Blobs" API
+               prefix := container + "|" + r.Form.Get("prefix")
+               marker := r.Form.Get("marker")
+
+               maxResults := 2
+               if n, err := strconv.Atoi(r.Form.Get("maxresults")); err == nil && n >= 1 && n <= 5000 {
+                       maxResults = n
+               }
+
+               resp := storage.BlobListResponse{
+                       Marker:     marker,
+                       NextMarker: "",
+                       MaxResults: int64(maxResults),
+               }
+               var hashes sort.StringSlice
+               for k := range h.blobs {
+                       if strings.HasPrefix(k, prefix) {
+                               hashes = append(hashes, k[len(container)+1:])
+                       }
+               }
+               hashes.Sort()
+               for _, hash := range hashes {
+                       if len(resp.Blobs) == maxResults {
+                               resp.NextMarker = hash
+                               break
+                       }
+                       if len(resp.Blobs) > 0 || marker == "" || marker == hash {
+                               blob := h.blobs[container+"|"+hash]
+                               resp.Blobs = append(resp.Blobs, storage.Blob{
+                                       Name: hash,
+                                       Properties: storage.BlobProperties{
+                                               LastModified:  blob.Mtime.Format(time.RFC1123),
+                                               ContentLength: int64(len(blob.Data)),
+                                               Etag:          blob.Etag,
+                                       },
+                               })
+                       }
+               }
+               buf, err := xml.Marshal(resp)
+               if err != nil {
+                       log.Print(err)
+                       rw.WriteHeader(http.StatusInternalServerError)
+               }
+               rw.Write(buf)
+       default:
+               log.Printf("azStubHandler: not implemented: %+v Body:%+q", r, body)
+               rw.WriteHeader(http.StatusNotImplemented)
+       }
+}
+
+// azStubDialer is a net.Dialer that notices when the Azure driver
+// tries to connect to "devstoreaccount1.blob.127.0.0.1:46067", and
+// in such cases transparently dials "127.0.0.1:46067" instead.
+type azStubDialer struct {
+       net.Dialer
+}
+
+var localHostPortRe = regexp.MustCompile(`(127\.0\.0\.1|localhost|\[::1\]):\d+`)
+
+func (d *azStubDialer) Dial(network, address string) (net.Conn, error) {
+       if hp := localHostPortRe.FindString(address); hp != "" {
+               log.Println("azStubDialer: dial", hp, "instead of", address)
+               address = hp
+       }
+       return d.Dialer.Dial(network, address)
+}
+
+type TestableAzureBlobVolume struct {
+       *AzureBlobVolume
+       azHandler *azStubHandler
+       azStub    *httptest.Server
+       t         *testing.T
+}
+
+func NewTestableAzureBlobVolume(t *testing.T, readonly bool, replication int) *TestableAzureBlobVolume {
+       azHandler := newAzStubHandler()
+       azStub := httptest.NewServer(azHandler)
+
+       var azClient storage.Client
+
+       container := azureTestContainer
+       if container == "" {
+               // Connect to stub instead of real Azure storage service
+               stubURLBase := strings.Split(azStub.URL, "://")[1]
+               var err error
+               if azClient, err = storage.NewClient(emulatorAccountName, emulatorAccountKey, stubURLBase, storage.DefaultAPIVersion, false); err != nil {
+                       t.Fatal(err)
+               }
+               container = "fakecontainername"
+       } else {
+               // Connect to real Azure storage service
+               accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               azClient, err = storage.NewBasicClient(azureStorageAccountName, accountKey)
+               if err != nil {
+                       t.Fatal(err)
+               }
+       }
+
+       v := NewAzureBlobVolume(azClient, container, readonly, replication)
+
+       return &TestableAzureBlobVolume{
+               AzureBlobVolume: v,
+               azHandler:       azHandler,
+               azStub:          azStub,
+               t:               t,
+       }
+}
+
+func TestAzureBlobVolumeWithGeneric(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+       azureWriteRaceInterval = time.Millisecond
+       azureWriteRacePollTime = time.Nanosecond
+       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+               return NewTestableAzureBlobVolume(t, false, azureStorageReplication)
+       })
+}
+
+func TestReadonlyAzureBlobVolumeWithGeneric(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+       azureWriteRaceInterval = time.Millisecond
+       azureWriteRacePollTime = time.Nanosecond
+       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+               return NewTestableAzureBlobVolume(t, true, azureStorageReplication)
+       })
+}
+
+func TestAzureBlobVolumeReplication(t *testing.T) {
+       for r := 1; r <= 4; r++ {
+               v := NewTestableAzureBlobVolume(t, false, r)
+               defer v.Teardown()
+               if n := v.Replication(); n != r {
+                       t.Errorf("Got replication %d, expected %d", n, r)
+               }
+       }
+}
+
+func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+
+       v := NewTestableAzureBlobVolume(t, false, 3)
+       defer v.Teardown()
+
+       azureWriteRaceInterval = time.Second
+       azureWriteRacePollTime = time.Millisecond
+
+       allDone := make(chan struct{})
+       v.azHandler.race = make(chan chan struct{})
+       go func() {
+               err := v.Put(TestHash, TestBlock)
+               if err != nil {
+                       t.Error(err)
+               }
+       }()
+       continuePut := make(chan struct{})
+       // Wait for the stub's Put to create the empty blob
+       v.azHandler.race <- continuePut
+       go func() {
+               buf, err := v.Get(TestHash)
+               if err != nil {
+                       t.Error(err)
+               } else {
+                       bufs.Put(buf)
+               }
+               close(allDone)
+       }()
+       // Wait for the stub's Get to get the empty blob
+       close(v.azHandler.race)
+       // Allow stub's Put to continue, so the real data is ready
+       // when the volume's Get retries
+       <-continuePut
+       // Wait for volume's Get to return the real data
+       <-allDone
+}
+
+func TestAzureBlobVolumeCreateBlobRaceDeadline(t *testing.T) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+
+       v := NewTestableAzureBlobVolume(t, false, 3)
+       defer v.Teardown()
+
+       azureWriteRaceInterval = 2 * time.Second
+       azureWriteRacePollTime = 5 * time.Millisecond
+
+       v.PutRaw(TestHash, nil)
+
+       buf := new(bytes.Buffer)
+       v.IndexTo("", buf)
+       if buf.Len() != 0 {
+               t.Errorf("Index %+q should be empty", buf.Bytes())
+       }
+
+       v.TouchWithDate(TestHash, time.Now().Add(-1982 * time.Millisecond))
+
+       allDone := make(chan struct{})
+       go func() {
+               defer close(allDone)
+               buf, err := v.Get(TestHash)
+               if err != nil {
+                       t.Error(err)
+                       return
+               }
+               if len(buf) != 0 {
+                       t.Errorf("Got %+q, expected empty buf", buf)
+               }
+               bufs.Put(buf)
+       }()
+       select {
+       case <-allDone:
+       case <-time.After(time.Second):
+               t.Error("Get should have stopped waiting for race when block was 2s old")
+       }
+
+       buf.Reset()
+       v.IndexTo("", buf)
+       if !bytes.HasPrefix(buf.Bytes(), []byte(TestHash+"+0")) {
+               t.Errorf("Index %+q should have %+q", buf.Bytes(), TestHash+"+0")
+       }
+}
+
+func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
+       v.azHandler.PutRaw(v.containerName, locator, data)
+}
+
+func (v *TestableAzureBlobVolume) TouchWithDate(locator string, lastPut time.Time) {
+       v.azHandler.TouchWithDate(v.containerName, locator, lastPut)
+}
+
+func (v *TestableAzureBlobVolume) Teardown() {
+       v.azStub.Close()
+}
+
+func makeEtag() string {
+       return fmt.Sprintf("0x%x", rand.Int63())
+}
index 95d118e221de6b8516654f8a133b871da00c5cd2..8726a19150c7faf2f309cc6b3ba647d9aa40dd54 100644 (file)
@@ -18,12 +18,12 @@ type BufferPoolSuite struct{}
 // Initialize a default-sized buffer pool for the benefit of test
 // suites that don't run main().
 func init() {
-       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+       bufs = newBufferPool(maxBuffers, BlockSize)
 }
 
 // Restore sane default after bufferpool's own tests
 func (s *BufferPoolSuite) TearDownTest(c *C) {
-       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+       bufs = newBufferPool(maxBuffers, BlockSize)
 }
 
 func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
index 210286ad75ab3869aaf6a9690f5ef341eb15b549..a4af563729b3cf0e72686a2913fd3664e497e24d 100644 (file)
@@ -1,6 +1,7 @@
 package main
 
 import (
+       "bytes"
        "crypto/md5"
        "fmt"
        "io"
@@ -35,7 +36,7 @@ func collisionOrCorrupt(expectMD5 string, buf1, buf2 []byte, rdr io.Reader) erro
        }
        var err error
        for rdr != nil && err == nil {
-               buf := make([]byte, 1 << 18)
+               buf := make([]byte, 1<<18)
                var n int
                n, err = rdr.Read(buf)
                data <- buf[:n]
@@ -47,3 +48,37 @@ func collisionOrCorrupt(expectMD5 string, buf1, buf2 []byte, rdr io.Reader) erro
        }
        return <-outcome
 }
+
+func compareReaderWithBuf(rdr io.Reader, expect []byte, hash string) error {
+       bufLen := 1 << 20
+       if bufLen > len(expect) && len(expect) > 0 {
+               // No need for bufLen to be longer than
+               // expect, except that len(buf)==0 would
+               // prevent us from handling empty readers the
+               // same way as non-empty readers: reading 0
+               // bytes at a time never reaches EOF.
+               bufLen = len(expect)
+       }
+       buf := make([]byte, bufLen)
+       cmp := expect
+
+       // Loop invariants: all data read so far matched what
+       // we expected, and the first N bytes of cmp are
+       // expected to equal the next N bytes read from
+       // rdr.
+       for {
+               n, err := rdr.Read(buf)
+               if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
+                       return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], buf[:n], rdr)
+               }
+               cmp = cmp[n:]
+               if err == io.EOF {
+                       if len(cmp) != 0 {
+                               return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], nil, nil)
+                       }
+                       return nil
+               } else if err != nil {
+                       return err
+               }
+       }
+}
index a9bf91e842f7178fff900e40fb0c2b75fa4e9fba..3817ea19002d1c18f14c2479a383fb2d1601d763 100644 (file)
@@ -25,10 +25,10 @@ import (
 // A RequestTester represents the parameters for an HTTP request to
 // be issued on behalf of a unit test.
 type RequestTester struct {
-       uri          string
-       api_token    string
-       method       string
-       request_body []byte
+       uri         string
+       apiToken    string
+       method      string
+       requestBody []byte
 }
 
 // Test GetBlockHandler on the following situations:
@@ -46,76 +46,76 @@ func TestGetHandler(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllWritable()
-       if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
+       if err := vols[0].Put(TestHash, TestBlock); err != nil {
                t.Error(err)
        }
 
        // Create locators for testing.
        // Turn on permission settings so we can generate signed locators.
-       enforce_permissions = true
-       PermissionSecret = []byte(known_key)
-       blob_signature_ttl = 300 * time.Second
+       enforcePermissions = true
+       PermissionSecret = []byte(knownKey)
+       blobSignatureTTL = 300 * time.Second
 
        var (
-               unsigned_locator  = "/" + TEST_HASH
-               valid_timestamp   = time.Now().Add(blob_signature_ttl)
-               expired_timestamp = time.Now().Add(-time.Hour)
-               signed_locator    = "/" + SignLocator(TEST_HASH, known_token, valid_timestamp)
-               expired_locator   = "/" + SignLocator(TEST_HASH, known_token, expired_timestamp)
+               unsignedLocator  = "/" + TestHash
+               validTimestamp   = time.Now().Add(blobSignatureTTL)
+               expiredTimestamp = time.Now().Add(-time.Hour)
+               signedLocator    = "/" + SignLocator(TestHash, knownToken, validTimestamp)
+               expiredLocator   = "/" + SignLocator(TestHash, knownToken, expiredTimestamp)
        )
 
        // -----------------
        // Test unauthenticated request with permissions off.
-       enforce_permissions = false
+       enforcePermissions = false
 
        // Unauthenticated request, unsigned locator
        // => OK
        response := IssueRequest(
                &RequestTester{
                        method: "GET",
-                       uri:    unsigned_locator,
+                       uri:    unsignedLocator,
                })
        ExpectStatusCode(t,
                "Unauthenticated request, unsigned locator", http.StatusOK, response)
        ExpectBody(t,
                "Unauthenticated request, unsigned locator",
-               string(TEST_BLOCK),
+               string(TestBlock),
                response)
 
-       received_cl := response.Header().Get("Content-Length")
-       expected_cl := fmt.Sprintf("%d", len(TEST_BLOCK))
-       if received_cl != expected_cl {
-               t.Errorf("expected Content-Length %s, got %s", expected_cl, received_cl)
+       receivedLen := response.Header().Get("Content-Length")
+       expectedLen := fmt.Sprintf("%d", len(TestBlock))
+       if receivedLen != expectedLen {
+               t.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
        }
 
        // ----------------
        // Permissions: on.
-       enforce_permissions = true
+       enforcePermissions = true
 
        // Authenticated request, signed locator
        // => OK
        response = IssueRequest(&RequestTester{
-               method:    "GET",
-               uri:       signed_locator,
-               api_token: known_token,
+               method:   "GET",
+               uri:      signedLocator,
+               apiToken: knownToken,
        })
        ExpectStatusCode(t,
                "Authenticated request, signed locator", http.StatusOK, response)
        ExpectBody(t,
-               "Authenticated request, signed locator", string(TEST_BLOCK), response)
+               "Authenticated request, signed locator", string(TestBlock), response)
 
-       received_cl = response.Header().Get("Content-Length")
-       expected_cl = fmt.Sprintf("%d", len(TEST_BLOCK))
-       if received_cl != expected_cl {
-               t.Errorf("expected Content-Length %s, got %s", expected_cl, received_cl)
+       receivedLen = response.Header().Get("Content-Length")
+       expectedLen = fmt.Sprintf("%d", len(TestBlock))
+       if receivedLen != expectedLen {
+               t.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
        }
 
        // Authenticated request, unsigned locator
        // => PermissionError
        response = IssueRequest(&RequestTester{
-               method:    "GET",
-               uri:       unsigned_locator,
-               api_token: known_token,
+               method:   "GET",
+               uri:      unsignedLocator,
+               apiToken: knownToken,
        })
        ExpectStatusCode(t, "unsigned locator", PermissionError.HTTPCode, response)
 
@@ -123,7 +123,7 @@ func TestGetHandler(t *testing.T) {
        // => PermissionError
        response = IssueRequest(&RequestTester{
                method: "GET",
-               uri:    signed_locator,
+               uri:    signedLocator,
        })
        ExpectStatusCode(t,
                "Unauthenticated request, signed locator",
@@ -132,9 +132,9 @@ func TestGetHandler(t *testing.T) {
        // Authenticated request, expired locator
        // => ExpiredError
        response = IssueRequest(&RequestTester{
-               method:    "GET",
-               uri:       expired_locator,
-               api_token: known_token,
+               method:   "GET",
+               uri:      expiredLocator,
+               apiToken: knownToken,
        })
        ExpectStatusCode(t,
                "Authenticated request, expired locator",
@@ -158,25 +158,25 @@ func TestPutHandler(t *testing.T) {
 
        // Unauthenticated request, no server key
        // => OK (unsigned response)
-       unsigned_locator := "/" + TEST_HASH
+       unsignedLocator := "/" + TestHash
        response := IssueRequest(
                &RequestTester{
-                       method:       "PUT",
-                       uri:          unsigned_locator,
-                       request_body: TEST_BLOCK,
+                       method:      "PUT",
+                       uri:         unsignedLocator,
+                       requestBody: TestBlock,
                })
 
        ExpectStatusCode(t,
                "Unauthenticated request, no server key", http.StatusOK, response)
        ExpectBody(t,
                "Unauthenticated request, no server key",
-               TEST_HASH_PUT_RESPONSE, response)
+               TestHashPutResp, response)
 
        // ------------------
        // With a server key.
 
-       PermissionSecret = []byte(known_key)
-       blob_signature_ttl = 300 * time.Second
+       PermissionSecret = []byte(knownKey)
+       blobSignatureTTL = 300 * time.Second
 
        // When a permission key is available, the locator returned
        // from an authenticated PUT request will be signed.
@@ -185,29 +185,29 @@ func TestPutHandler(t *testing.T) {
        // => OK (signed response)
        response = IssueRequest(
                &RequestTester{
-                       method:       "PUT",
-                       uri:          unsigned_locator,
-                       request_body: TEST_BLOCK,
-                       api_token:    known_token,
+                       method:      "PUT",
+                       uri:         unsignedLocator,
+                       requestBody: TestBlock,
+                       apiToken:    knownToken,
                })
 
        ExpectStatusCode(t,
                "Authenticated PUT, signed locator, with server key",
                http.StatusOK, response)
-       response_locator := strings.TrimSpace(response.Body.String())
-       if VerifySignature(response_locator, known_token) != nil {
+       responseLocator := strings.TrimSpace(response.Body.String())
+       if VerifySignature(responseLocator, knownToken) != nil {
                t.Errorf("Authenticated PUT, signed locator, with server key:\n"+
                        "response '%s' does not contain a valid signature",
-                       response_locator)
+                       responseLocator)
        }
 
        // Unauthenticated PUT, unsigned locator
        // => OK
        response = IssueRequest(
                &RequestTester{
-                       method:       "PUT",
-                       uri:          unsigned_locator,
-                       request_body: TEST_BLOCK,
+                       method:      "PUT",
+                       uri:         unsignedLocator,
+                       requestBody: TestBlock,
                })
 
        ExpectStatusCode(t,
@@ -215,32 +215,32 @@ func TestPutHandler(t *testing.T) {
                http.StatusOK, response)
        ExpectBody(t,
                "Unauthenticated PUT, unsigned locator, with server key",
-               TEST_HASH_PUT_RESPONSE, response)
+               TestHashPutResp, response)
 }
 
 func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
        defer teardown()
-       data_manager_token = "fake-data-manager-token"
+       dataManagerToken = "fake-data-manager-token"
        vols := []*MockVolume{CreateMockVolume(), CreateMockVolume()}
        vols[0].Readonly = true
        KeepVM = MakeRRVolumeManager([]Volume{vols[0], vols[1]})
        defer KeepVM.Close()
        IssueRequest(
                &RequestTester{
-                       method:       "PUT",
-                       uri:          "/" + TEST_HASH,
-                       request_body: TEST_BLOCK,
+                       method:      "PUT",
+                       uri:         "/" + TestHash,
+                       requestBody: TestBlock,
                })
        defer func(orig bool) {
-               never_delete = orig
-       }(never_delete)
-       never_delete = false
+               neverDelete = orig
+       }(neverDelete)
+       neverDelete = false
        IssueRequest(
                &RequestTester{
-                       method:       "DELETE",
-                       uri:          "/" + TEST_HASH,
-                       request_body: TEST_BLOCK,
-                       api_token:    data_manager_token,
+                       method:      "DELETE",
+                       uri:         "/" + TestHash,
+                       requestBody: TestBlock,
+                       apiToken:    dataManagerToken,
                })
        type expect struct {
                volnum    int
@@ -274,7 +274,7 @@ func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
 //   - authenticated   /index/prefix request | superuser
 //
 // The only /index requests that should succeed are those issued by the
-// superuser. They should pass regardless of the value of enforce_permissions.
+// superuser. They should pass regardless of the value of enforcePermissions.
 //
 func TestIndexHandler(t *testing.T) {
        defer teardown()
@@ -286,61 +286,71 @@ func TestIndexHandler(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllWritable()
-       vols[0].Put(TEST_HASH, TEST_BLOCK)
-       vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
-       vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
-       vols[1].Put(TEST_HASH_2+".meta", []byte("metadata"))
+       vols[0].Put(TestHash, TestBlock)
+       vols[1].Put(TestHash2, TestBlock2)
+       vols[0].Put(TestHash+".meta", []byte("metadata"))
+       vols[1].Put(TestHash2+".meta", []byte("metadata"))
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
 
-       unauthenticated_req := &RequestTester{
+       unauthenticatedReq := &RequestTester{
                method: "GET",
                uri:    "/index",
        }
-       authenticated_req := &RequestTester{
-               method:    "GET",
-               uri:       "/index",
-               api_token: known_token,
+       authenticatedReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index",
+               apiToken: knownToken,
        }
-       superuser_req := &RequestTester{
-               method:    "GET",
-               uri:       "/index",
-               api_token: data_manager_token,
+       superuserReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index",
+               apiToken: dataManagerToken,
        }
-       unauth_prefix_req := &RequestTester{
+       unauthPrefixReq := &RequestTester{
                method: "GET",
-               uri:    "/index/" + TEST_HASH[0:3],
+               uri:    "/index/" + TestHash[0:3],
        }
-       auth_prefix_req := &RequestTester{
-               method:    "GET",
-               uri:       "/index/" + TEST_HASH[0:3],
-               api_token: known_token,
+       authPrefixReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index/" + TestHash[0:3],
+               apiToken: knownToken,
        }
-       superuser_prefix_req := &RequestTester{
-               method:    "GET",
-               uri:       "/index/" + TEST_HASH[0:3],
-               api_token: data_manager_token,
+       superuserPrefixReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index/" + TestHash[0:3],
+               apiToken: dataManagerToken,
+       }
+       superuserNoSuchPrefixReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index/abcd",
+               apiToken: dataManagerToken,
+       }
+       superuserInvalidPrefixReq := &RequestTester{
+               method:   "GET",
+               uri:      "/index/xyz",
+               apiToken: dataManagerToken,
        }
 
        // -------------------------------------------------------------
        // Only the superuser should be allowed to issue /index requests.
 
        // ---------------------------
-       // enforce_permissions enabled
+       // enforcePermissions enabled
        // This setting should not affect tests passing.
-       enforce_permissions = true
+       enforcePermissions = true
 
        // unauthenticated /index request
        // => UnauthorizedError
-       response := IssueRequest(unauthenticated_req)
+       response := IssueRequest(unauthenticatedReq)
        ExpectStatusCode(t,
-               "enforce_permissions on, unauthenticated request",
+               "enforcePermissions on, unauthenticated request",
                UnauthorizedError.HTTPCode,
                response)
 
        // unauthenticated /index/prefix request
        // => UnauthorizedError
-       response = IssueRequest(unauth_prefix_req)
+       response = IssueRequest(unauthPrefixReq)
        ExpectStatusCode(t,
                "permissions on, unauthenticated /index/prefix request",
                UnauthorizedError.HTTPCode,
@@ -348,7 +358,7 @@ func TestIndexHandler(t *testing.T) {
 
        // authenticated /index request, non-superuser
        // => UnauthorizedError
-       response = IssueRequest(authenticated_req)
+       response = IssueRequest(authenticatedReq)
        ExpectStatusCode(t,
                "permissions on, authenticated request, non-superuser",
                UnauthorizedError.HTTPCode,
@@ -356,7 +366,7 @@ func TestIndexHandler(t *testing.T) {
 
        // authenticated /index/prefix request, non-superuser
        // => UnauthorizedError
-       response = IssueRequest(auth_prefix_req)
+       response = IssueRequest(authPrefixReq)
        ExpectStatusCode(t,
                "permissions on, authenticated /index/prefix request, non-superuser",
                UnauthorizedError.HTTPCode,
@@ -364,27 +374,27 @@ func TestIndexHandler(t *testing.T) {
 
        // superuser /index request
        // => OK
-       response = IssueRequest(superuser_req)
+       response = IssueRequest(superuserReq)
        ExpectStatusCode(t,
                "permissions on, superuser request",
                http.StatusOK,
                response)
 
        // ----------------------------
-       // enforce_permissions disabled
+       // enforcePermissions disabled
        // Valid Request should still pass.
-       enforce_permissions = false
+       enforcePermissions = false
 
        // superuser /index request
        // => OK
-       response = IssueRequest(superuser_req)
+       response = IssueRequest(superuserReq)
        ExpectStatusCode(t,
                "permissions on, superuser request",
                http.StatusOK,
                response)
 
-       expected := `^` + TEST_HASH + `\+\d+ \d+\n` +
-               TEST_HASH_2 + `\+\d+ \d+\n\n$`
+       expected := `^` + TestHash + `\+\d+ \d+\n` +
+               TestHash2 + `\+\d+ \d+\n\n$`
        match, _ := regexp.MatchString(expected, response.Body.String())
        if !match {
                t.Errorf(
@@ -394,19 +404,39 @@ func TestIndexHandler(t *testing.T) {
 
        // superuser /index/prefix request
        // => OK
-       response = IssueRequest(superuser_prefix_req)
+       response = IssueRequest(superuserPrefixReq)
        ExpectStatusCode(t,
                "permissions on, superuser request",
                http.StatusOK,
                response)
 
-       expected = `^` + TEST_HASH + `\+\d+ \d+\n\n$`
+       expected = `^` + TestHash + `\+\d+ \d+\n\n$`
        match, _ = regexp.MatchString(expected, response.Body.String())
        if !match {
                t.Errorf(
                        "permissions on, superuser /index/prefix request: expected %s, got:\n%s",
                        expected, response.Body.String())
        }
+
+       // superuser /index/{no-such-prefix} request
+       // => OK
+       response = IssueRequest(superuserNoSuchPrefixReq)
+       ExpectStatusCode(t,
+               "permissions on, superuser request",
+               http.StatusOK,
+               response)
+
+       if "\n" != response.Body.String() {
+               t.Errorf("Expected empty response for %s. Found %s", superuserNoSuchPrefixReq.uri, response.Body.String())
+       }
+
+       // superuser /index/{invalid-prefix} request
+       // => StatusBadRequest
+       response = IssueRequest(superuserInvalidPrefixReq)
+       ExpectStatusCode(t,
+               "permissions on, superuser request",
+               http.StatusBadRequest,
+               response)
 }
 
 // TestDeleteHandler
@@ -445,51 +475,51 @@ func TestDeleteHandler(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllWritable()
-       vols[0].Put(TEST_HASH, TEST_BLOCK)
+       vols[0].Put(TestHash, TestBlock)
 
-       // Explicitly set the blob_signature_ttl to 0 for these
+       // Explicitly set the blobSignatureTTL to 0 for these
        // tests, to ensure the MockVolume deletes the blocks
        // even though they have just been created.
-       blob_signature_ttl = time.Duration(0)
+       blobSignatureTTL = time.Duration(0)
 
-       var user_token = "NOT DATA MANAGER TOKEN"
-       data_manager_token = "DATA MANAGER TOKEN"
+       var userToken = "NOT DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
 
-       never_delete = false
+       neverDelete = false
 
-       unauth_req := &RequestTester{
+       unauthReq := &RequestTester{
                method: "DELETE",
-               uri:    "/" + TEST_HASH,
+               uri:    "/" + TestHash,
        }
 
-       user_req := &RequestTester{
-               method:    "DELETE",
-               uri:       "/" + TEST_HASH,
-               api_token: user_token,
+       userReq := &RequestTester{
+               method:   "DELETE",
+               uri:      "/" + TestHash,
+               apiToken: userToken,
        }
 
-       superuser_existing_block_req := &RequestTester{
-               method:    "DELETE",
-               uri:       "/" + TEST_HASH,
-               api_token: data_manager_token,
+       superuserExistingBlockReq := &RequestTester{
+               method:   "DELETE",
+               uri:      "/" + TestHash,
+               apiToken: dataManagerToken,
        }
 
-       superuser_nonexistent_block_req := &RequestTester{
-               method:    "DELETE",
-               uri:       "/" + TEST_HASH_2,
-               api_token: data_manager_token,
+       superuserNonexistentBlockReq := &RequestTester{
+               method:   "DELETE",
+               uri:      "/" + TestHash2,
+               apiToken: dataManagerToken,
        }
 
        // Unauthenticated request returns PermissionError.
        var response *httptest.ResponseRecorder
-       response = IssueRequest(unauth_req)
+       response = IssueRequest(unauthReq)
        ExpectStatusCode(t,
                "unauthenticated request",
                PermissionError.HTTPCode,
                response)
 
        // Authenticated non-admin request returns PermissionError.
-       response = IssueRequest(user_req)
+       response = IssueRequest(userReq)
        ExpectStatusCode(t,
                "authenticated non-admin request",
                PermissionError.HTTPCode,
@@ -500,62 +530,62 @@ func TestDeleteHandler(t *testing.T) {
                Deleted int `json:"copies_deleted"`
                Failed  int `json:"copies_failed"`
        }
-       var response_dc, expected_dc deletecounter
+       var responseDc, expectedDc deletecounter
 
-       response = IssueRequest(superuser_nonexistent_block_req)
+       response = IssueRequest(superuserNonexistentBlockReq)
        ExpectStatusCode(t,
                "data manager request, nonexistent block",
                http.StatusNotFound,
                response)
 
-       // Authenticated admin request for existing block while never_delete is set.
-       never_delete = true
-       response = IssueRequest(superuser_existing_block_req)
+       // Authenticated admin request for existing block while neverDelete is set.
+       neverDelete = true
+       response = IssueRequest(superuserExistingBlockReq)
        ExpectStatusCode(t,
                "authenticated request, existing block, method disabled",
                MethodDisabledError.HTTPCode,
                response)
-       never_delete = false
+       neverDelete = false
 
        // Authenticated admin request for existing block.
-       response = IssueRequest(superuser_existing_block_req)
+       response = IssueRequest(superuserExistingBlockReq)
        ExpectStatusCode(t,
                "data manager request, existing block",
                http.StatusOK,
                response)
        // Expect response {"copies_deleted":1,"copies_failed":0}
-       expected_dc = deletecounter{1, 0}
-       json.NewDecoder(response.Body).Decode(&response_dc)
-       if response_dc != expected_dc {
-               t.Errorf("superuser_existing_block_req\nexpected: %+v\nreceived: %+v",
-                       expected_dc, response_dc)
+       expectedDc = deletecounter{1, 0}
+       json.NewDecoder(response.Body).Decode(&responseDc)
+       if responseDc != expectedDc {
+               t.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
+                       expectedDc, responseDc)
        }
        // Confirm the block has been deleted
-       _, err := vols[0].Get(TEST_HASH)
-       var block_deleted = os.IsNotExist(err)
-       if !block_deleted {
-               t.Error("superuser_existing_block_req: block not deleted")
+       _, err := vols[0].Get(TestHash)
+       var blockDeleted = os.IsNotExist(err)
+       if !blockDeleted {
+               t.Error("superuserExistingBlockReq: block not deleted")
        }
 
-       // A DELETE request on a block newer than blob_signature_ttl
+       // A DELETE request on a block newer than blobSignatureTTL
        // should return success but leave the block on the volume.
-       vols[0].Put(TEST_HASH, TEST_BLOCK)
-       blob_signature_ttl = time.Hour
+       vols[0].Put(TestHash, TestBlock)
+       blobSignatureTTL = time.Hour
 
-       response = IssueRequest(superuser_existing_block_req)
+       response = IssueRequest(superuserExistingBlockReq)
        ExpectStatusCode(t,
                "data manager request, existing block",
                http.StatusOK,
                response)
        // Expect response {"copies_deleted":1,"copies_failed":0}
-       expected_dc = deletecounter{1, 0}
-       json.NewDecoder(response.Body).Decode(&response_dc)
-       if response_dc != expected_dc {
-               t.Errorf("superuser_existing_block_req\nexpected: %+v\nreceived: %+v",
-                       expected_dc, response_dc)
+       expectedDc = deletecounter{1, 0}
+       json.NewDecoder(response.Body).Decode(&responseDc)
+       if responseDc != expectedDc {
+               t.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
+                       expectedDc, responseDc)
        }
        // Confirm the block has NOT been deleted.
-       _, err = vols[0].Get(TEST_HASH)
+       _, err = vols[0].Get(TestHash)
        if err != nil {
                t.Errorf("testing delete on new block: %s\n", err)
        }
@@ -591,12 +621,12 @@ func TestDeleteHandler(t *testing.T) {
 func TestPullHandler(t *testing.T) {
        defer teardown()
 
-       var user_token = "USER TOKEN"
-       data_manager_token = "DATA MANAGER TOKEN"
+       var userToken = "USER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
 
        pullq = NewWorkQueue()
 
-       good_json := []byte(`[
+       goodJSON := []byte(`[
                {
                        "locator":"locator_with_two_servers",
                        "servers":[
@@ -614,36 +644,36 @@ func TestPullHandler(t *testing.T) {
                }
        ]`)
 
-       bad_json := []byte(`{ "key":"I'm a little teapot" }`)
+       badJSON := []byte(`{ "key":"I'm a little teapot" }`)
 
        type pullTest struct {
-               name          string
-               req           RequestTester
-               response_code int
-               response_body string
+               name         string
+               req          RequestTester
+               responseCode int
+               responseBody string
        }
        var testcases = []pullTest{
                {
                        "Valid pull list from an ordinary user",
-                       RequestTester{"/pull", user_token, "PUT", good_json},
+                       RequestTester{"/pull", userToken, "PUT", goodJSON},
                        http.StatusUnauthorized,
                        "Unauthorized\n",
                },
                {
                        "Invalid pull request from an ordinary user",
-                       RequestTester{"/pull", user_token, "PUT", bad_json},
+                       RequestTester{"/pull", userToken, "PUT", badJSON},
                        http.StatusUnauthorized,
                        "Unauthorized\n",
                },
                {
                        "Valid pull request from the data manager",
-                       RequestTester{"/pull", data_manager_token, "PUT", good_json},
+                       RequestTester{"/pull", dataManagerToken, "PUT", goodJSON},
                        http.StatusOK,
                        "Received 3 pull requests\n",
                },
                {
                        "Invalid pull request from the data manager",
-                       RequestTester{"/pull", data_manager_token, "PUT", bad_json},
+                       RequestTester{"/pull", dataManagerToken, "PUT", badJSON},
                        http.StatusBadRequest,
                        "",
                },
@@ -651,8 +681,8 @@ func TestPullHandler(t *testing.T) {
 
        for _, tst := range testcases {
                response := IssueRequest(&tst.req)
-               ExpectStatusCode(t, tst.name, tst.response_code, response)
-               ExpectBody(t, tst.name, tst.response_body, response)
+               ExpectStatusCode(t, tst.name, tst.responseCode, response)
+               ExpectBody(t, tst.name, tst.responseBody, response)
        }
 
        // The Keep pull manager should have received one good list with 3
@@ -697,12 +727,12 @@ func TestPullHandler(t *testing.T) {
 func TestTrashHandler(t *testing.T) {
        defer teardown()
 
-       var user_token = "USER TOKEN"
-       data_manager_token = "DATA MANAGER TOKEN"
+       var userToken = "USER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
 
        trashq = NewWorkQueue()
 
-       good_json := []byte(`[
+       goodJSON := []byte(`[
                {
                        "locator":"block1",
                        "block_mtime":1409082153
@@ -717,37 +747,37 @@ func TestTrashHandler(t *testing.T) {
                }
        ]`)
 
-       bad_json := []byte(`I am not a valid JSON string`)
+       badJSON := []byte(`I am not a valid JSON string`)
 
        type trashTest struct {
-               name          string
-               req           RequestTester
-               response_code int
-               response_body string
+               name         string
+               req          RequestTester
+               responseCode int
+               responseBody string
        }
 
        var testcases = []trashTest{
                {
                        "Valid trash list from an ordinary user",
-                       RequestTester{"/trash", user_token, "PUT", good_json},
+                       RequestTester{"/trash", userToken, "PUT", goodJSON},
                        http.StatusUnauthorized,
                        "Unauthorized\n",
                },
                {
                        "Invalid trash list from an ordinary user",
-                       RequestTester{"/trash", user_token, "PUT", bad_json},
+                       RequestTester{"/trash", userToken, "PUT", badJSON},
                        http.StatusUnauthorized,
                        "Unauthorized\n",
                },
                {
                        "Valid trash list from the data manager",
-                       RequestTester{"/trash", data_manager_token, "PUT", good_json},
+                       RequestTester{"/trash", dataManagerToken, "PUT", goodJSON},
                        http.StatusOK,
                        "Received 3 trash requests\n",
                },
                {
                        "Invalid trash list from the data manager",
-                       RequestTester{"/trash", data_manager_token, "PUT", bad_json},
+                       RequestTester{"/trash", dataManagerToken, "PUT", badJSON},
                        http.StatusBadRequest,
                        "",
                },
@@ -755,8 +785,8 @@ func TestTrashHandler(t *testing.T) {
 
        for _, tst := range testcases {
                response := IssueRequest(&tst.req)
-               ExpectStatusCode(t, tst.name, tst.response_code, response)
-               ExpectBody(t, tst.name, tst.response_body, response)
+               ExpectStatusCode(t, tst.name, tst.responseCode, response)
+               ExpectBody(t, tst.name, tst.responseBody, response)
        }
 
        // The trash collector should have received one good list with 3
@@ -779,10 +809,10 @@ func TestTrashHandler(t *testing.T) {
 // REST router.  It returns the HTTP response to the request.
 func IssueRequest(rt *RequestTester) *httptest.ResponseRecorder {
        response := httptest.NewRecorder()
-       body := bytes.NewReader(rt.request_body)
+       body := bytes.NewReader(rt.requestBody)
        req, _ := http.NewRequest(rt.method, rt.uri, body)
-       if rt.api_token != "" {
-               req.Header.Set("Authorization", "OAuth2 "+rt.api_token)
+       if rt.apiToken != "" {
+               req.Header.Set("Authorization", "OAuth2 "+rt.apiToken)
        }
        loggingRouter := MakeLoggingRESTRouter()
        loggingRouter.ServeHTTP(response, req)
@@ -794,22 +824,22 @@ func IssueRequest(rt *RequestTester) *httptest.ResponseRecorder {
 func ExpectStatusCode(
        t *testing.T,
        testname string,
-       expected_status int,
+       expectedStatus int,
        response *httptest.ResponseRecorder) {
-       if response.Code != expected_status {
+       if response.Code != expectedStatus {
                t.Errorf("%s: expected status %d, got %+v",
-                       testname, expected_status, response)
+                       testname, expectedStatus, response)
        }
 }
 
 func ExpectBody(
        t *testing.T,
        testname string,
-       expected_body string,
+       expectedBody string,
        response *httptest.ResponseRecorder) {
-       if expected_body != "" && response.Body.String() != expected_body {
+       if expectedBody != "" && response.Body.String() != expectedBody {
                t.Errorf("%s: expected response body '%s', got %+v",
-                       testname, expected_body, response)
+                       testname, expectedBody, response)
        }
 }
 
@@ -822,16 +852,16 @@ func TestPutNeedsOnlyOneBuffer(t *testing.T) {
        defer func(orig *bufferPool) {
                bufs = orig
        }(bufs)
-       bufs = newBufferPool(1, BLOCKSIZE)
+       bufs = newBufferPool(1, BlockSize)
 
        ok := make(chan struct{})
        go func() {
                for i := 0; i < 2; i++ {
                        response := IssueRequest(
                                &RequestTester{
-                                       method:       "PUT",
-                                       uri:          "/" + TEST_HASH,
-                                       request_body: TEST_BLOCK,
+                                       method:      "PUT",
+                                       uri:         "/" + TestHash,
+                                       requestBody: TestBlock,
                                })
                        ExpectStatusCode(t,
                                "TestPutNeedsOnlyOneBuffer", http.StatusOK, response)
@@ -857,21 +887,21 @@ func TestPutHandlerNoBufferleak(t *testing.T) {
 
        ok := make(chan bool)
        go func() {
-               for i := 0; i < maxBuffers+1; i += 1 {
+               for i := 0; i < maxBuffers+1; i++ {
                        // Unauthenticated request, no server key
                        // => OK (unsigned response)
-                       unsigned_locator := "/" + TEST_HASH
+                       unsignedLocator := "/" + TestHash
                        response := IssueRequest(
                                &RequestTester{
-                                       method:       "PUT",
-                                       uri:          unsigned_locator,
-                                       request_body: TEST_BLOCK,
+                                       method:      "PUT",
+                                       uri:         unsignedLocator,
+                                       requestBody: TestBlock,
                                })
                        ExpectStatusCode(t,
                                "TestPutHandlerBufferleak", http.StatusOK, response)
                        ExpectBody(t,
                                "TestPutHandlerBufferleak",
-                               TEST_HASH_PUT_RESPONSE, response)
+                               TestHashPutResp, response)
                }
                ok <- true
        }()
@@ -893,26 +923,26 @@ func TestGetHandlerNoBufferleak(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllWritable()
-       if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
+       if err := vols[0].Put(TestHash, TestBlock); err != nil {
                t.Error(err)
        }
 
        ok := make(chan bool)
        go func() {
-               for i := 0; i < maxBuffers+1; i += 1 {
+               for i := 0; i < maxBuffers+1; i++ {
                        // Unauthenticated request, unsigned locator
                        // => OK
-                       unsigned_locator := "/" + TEST_HASH
+                       unsignedLocator := "/" + TestHash
                        response := IssueRequest(
                                &RequestTester{
                                        method: "GET",
-                                       uri:    unsigned_locator,
+                                       uri:    unsignedLocator,
                                })
                        ExpectStatusCode(t,
                                "Unauthenticated request, unsigned locator", http.StatusOK, response)
                        ExpectBody(t,
                                "Unauthenticated request, unsigned locator",
-                               string(TEST_BLOCK),
+                               string(TestBlock),
                                response)
                }
                ok <- true
@@ -924,3 +954,19 @@ func TestGetHandlerNoBufferleak(t *testing.T) {
        case <-ok:
        }
 }
+
+func TestPutReplicationHeader(t *testing.T) {
+       defer teardown()
+
+       KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
+
+       resp := IssueRequest(&RequestTester{
+               method:      "PUT",
+               uri:         "/" + TestHash,
+               requestBody: TestBlock,
+       })
+       if r := resp.Header().Get("X-Keep-Replicas-Stored"); r != "1" {
+               t.Errorf("Got X-Keep-Replicas-Stored: %q, expected %q", r, "1")
+       }
+}
index e6129a73762de5a8829308ae3efe58aafacad9f5..95af1b48707c6b189982dc18762cb517769bd117 100644 (file)
@@ -60,12 +60,14 @@ func MakeRESTRouter() *mux.Router {
        return rest
 }
 
+// BadRequestHandler is a HandleFunc to address bad requests.
 func BadRequestHandler(w http.ResponseWriter, r *http.Request) {
        http.Error(w, BadRequestError.Error(), BadRequestError.HTTPCode)
 }
 
+// GetBlockHandler is a HandleFunc to address Get block requests.
 func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
-       if enforce_permissions {
+       if enforcePermissions {
                locator := req.URL.Path[1:] // strip leading slash
                if err := VerifySignature(locator, GetApiToken(req)); err != nil {
                        http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
@@ -87,6 +89,7 @@ func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
        resp.Write(block)
 }
 
+// PutBlockHandler is a HandleFunc to address Put block requests.
 func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
        hash := mux.Vars(req)["hash"]
 
@@ -99,7 +102,7 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       if req.ContentLength > BLOCKSIZE {
+       if req.ContentLength > BlockSize {
                http.Error(resp, TooLongError.Error(), TooLongError.HTTPCode)
                return
        }
@@ -117,7 +120,7 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       err = PutBlock(buf, hash)
+       replication, err := PutBlock(buf, hash)
        bufs.Put(buf)
 
        if err != nil {
@@ -128,18 +131,17 @@ func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
 
        // Success; add a size hint, sign the locator if possible, and
        // return it to the client.
-       return_hash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
-       api_token := GetApiToken(req)
-       if PermissionSecret != nil && api_token != "" {
-               expiry := time.Now().Add(blob_signature_ttl)
-               return_hash = SignLocator(return_hash, api_token, expiry)
-       }
-       resp.Write([]byte(return_hash + "\n"))
+       returnHash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
+       apiToken := GetApiToken(req)
+       if PermissionSecret != nil && apiToken != "" {
+               expiry := time.Now().Add(blobSignatureTTL)
+               returnHash = SignLocator(returnHash, apiToken, expiry)
+       }
+       resp.Header().Set("X-Keep-Replicas-Stored", strconv.Itoa(replication))
+       resp.Write([]byte(returnHash + "\n"))
 }
 
-// IndexHandler
-//     A HandleFunc to address /index and /index/{prefix} requests.
-//
+// IndexHandler is a HandleFunc to address /index and /index/{prefix} requests.
 func IndexHandler(resp http.ResponseWriter, req *http.Request) {
        // Reject unauthorized requests.
        if !IsDataManagerToken(GetApiToken(req)) {
@@ -177,20 +179,15 @@ func IndexHandler(resp http.ResponseWriter, req *http.Request) {
 //            * device_num (an integer identifying the underlying filesystem)
 //            * bytes_free
 //            * bytes_used
-//
-type VolumeStatus struct {
-       MountPoint string `json:"mount_point"`
-       DeviceNum  uint64 `json:"device_num"`
-       BytesFree  uint64 `json:"bytes_free"`
-       BytesUsed  uint64 `json:"bytes_used"`
-}
 
+// PoolStatus struct
 type PoolStatus struct {
        Alloc uint64 `json:"BytesAllocated"`
        Cap   int    `json:"BuffersMax"`
        Len   int    `json:"BuffersInUse"`
 }
 
+// NodeStatus struct
 type NodeStatus struct {
        Volumes    []*VolumeStatus `json:"volumes"`
        BufferPool PoolStatus
@@ -202,6 +199,7 @@ type NodeStatus struct {
 var st NodeStatus
 var stLock sync.Mutex
 
+// StatusHandler addresses /status.json requests.
 func StatusHandler(resp http.ResponseWriter, req *http.Request) {
        stLock.Lock()
        readNodeStatus(&st)
@@ -284,7 +282,7 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       if never_delete {
+       if neverDelete {
                http.Error(resp, MethodDisabledError.Error(), MethodDisabledError.HTTPCode)
                return
        }
@@ -360,11 +358,13 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) {
    If the JSON unmarshalling fails, return 400 Bad Request.
 */
 
+// PullRequest consists of a block locator and an ordered list of servers
 type PullRequest struct {
        Locator string   `json:"locator"`
        Servers []string `json:"servers"`
 }
 
+// PullHandler processes "PUT /pull" requests for the data manager.
 func PullHandler(resp http.ResponseWriter, req *http.Request) {
        // Reject unauthorized requests.
        if !IsDataManagerToken(GetApiToken(req)) {
@@ -394,11 +394,13 @@ func PullHandler(resp http.ResponseWriter, req *http.Request) {
        pullq.ReplaceQueue(plist)
 }
 
+// TrashRequest consists of a block locator and it's Mtime
 type TrashRequest struct {
        Locator    string `json:"locator"`
        BlockMtime int64  `json:"block_mtime"`
 }
 
+// TrashHandler processes /trash requests.
 func TrashHandler(resp http.ResponseWriter, req *http.Request) {
        // Reject unauthorized requests.
        if !IsDataManagerToken(GetApiToken(req)) {
@@ -439,8 +441,8 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) {
 // should be the only part of the code that cares about which volume a
 // block is stored on, so it should be responsible for figuring out
 // which volume to check for fetching blocks, storing blocks, etc.
-
 // ==============================
+
 // GetBlock fetches and returns the block identified by "hash".
 //
 // On success, GetBlock returns a byte slice with the block data, and
@@ -451,10 +453,9 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) {
 // If the block found does not have the correct MD5 hash, returns
 // DiskHashError.
 //
-
 func GetBlock(hash string) ([]byte, error) {
        // Attempt to read the requested hash from a keep volume.
-       error_to_caller := NotFoundError
+       errorToCaller := NotFoundError
 
        for _, vol := range KeepVM.AllReadable() {
                buf, err := vol.Get(hash)
@@ -477,79 +478,80 @@ func GetBlock(hash string) ([]byte, error) {
                        // this.
                        log.Printf("%s: checksum mismatch for request %s (actual %s)",
                                vol, hash, filehash)
-                       error_to_caller = DiskHashError
+                       errorToCaller = DiskHashError
                        bufs.Put(buf)
                        continue
                }
-               if error_to_caller == DiskHashError {
+               if errorToCaller == DiskHashError {
                        log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned",
                                vol, hash)
                }
                return buf, nil
        }
-       return nil, error_to_caller
+       return nil, errorToCaller
 }
 
-/* PutBlock(block, hash)
-   Stores the BLOCK (identified by the content id HASH) in Keep.
-
-   The MD5 checksum of the block must be identical to the content id HASH.
-   If not, an error is returned.
-
-   PutBlock stores the BLOCK on the first Keep volume with free space.
-   A failure code is returned to the user only if all volumes fail.
-
-   On success, PutBlock returns nil.
-   On failure, it returns a KeepError with one of the following codes:
-
-   500 Collision
-          A different block with the same hash already exists on this
-          Keep server.
-   422 MD5Fail
-          The MD5 hash of the BLOCK does not match the argument HASH.
-   503 Full
-          There was not enough space left in any Keep volume to store
-          the object.
-   500 Fail
-          The object could not be stored for some other reason (e.g.
-          all writes failed). The text of the error message should
-          provide as much detail as possible.
-*/
-
-func PutBlock(block []byte, hash string) error {
+// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
+//
+// PutBlock(block, hash)
+//   Stores the BLOCK (identified by the content id HASH) in Keep.
+//
+//   The MD5 checksum of the block must be identical to the content id HASH.
+//   If not, an error is returned.
+//
+//   PutBlock stores the BLOCK on the first Keep volume with free space.
+//   A failure code is returned to the user only if all volumes fail.
+//
+//   On success, PutBlock returns nil.
+//   On failure, it returns a KeepError with one of the following codes:
+//
+//   500 Collision
+//          A different block with the same hash already exists on this
+//          Keep server.
+//   422 MD5Fail
+//          The MD5 hash of the BLOCK does not match the argument HASH.
+//   503 Full
+//          There was not enough space left in any Keep volume to store
+//          the object.
+//   500 Fail
+//          The object could not be stored for some other reason (e.g.
+//          all writes failed). The text of the error message should
+//          provide as much detail as possible.
+//
+func PutBlock(block []byte, hash string) (int, error) {
        // Check that BLOCK's checksum matches HASH.
        blockhash := fmt.Sprintf("%x", md5.Sum(block))
        if blockhash != hash {
                log.Printf("%s: MD5 checksum %s did not match request", hash, blockhash)
-               return RequestHashError
+               return 0, RequestHashError
        }
 
        // If we already have this data, it's intact on disk, and we
        // can update its timestamp, return success. If we have
        // different data with the same hash, return failure.
-       if err := CompareAndTouch(hash, block); err == nil || err == CollisionError {
-               return err
+       if n, err := CompareAndTouch(hash, block); err == nil || err == CollisionError {
+               return n, err
        }
 
        // Choose a Keep volume to write to.
        // If this volume fails, try all of the volumes in order.
        if vol := KeepVM.NextWritable(); vol != nil {
                if err := vol.Put(hash, block); err == nil {
-                       return nil // success!
+                       return vol.Replication(), nil // success!
                }
        }
 
        writables := KeepVM.AllWritable()
        if len(writables) == 0 {
                log.Print("No writable volumes.")
-               return FullError
+               return 0, FullError
        }
 
        allFull := true
        for _, vol := range writables {
                err := vol.Put(hash, block)
                if err == nil {
-                       return nil // success!
+                       return vol.Replication(), nil // success!
                }
                if err != FullError {
                        // The volume is not full but the
@@ -562,18 +564,18 @@ func PutBlock(block []byte, hash string) error {
 
        if allFull {
                log.Print("All volumes are full.")
-               return FullError
-       } else {
-               // Already logged the non-full errors.
-               return GenericError
+               return 0, FullError
        }
+       // Already logged the non-full errors.
+       return 0, GenericError
 }
 
-// CompareAndTouch returns nil if one of the volumes already has the
-// given content and it successfully updates the relevant block's
-// modification time in order to protect it from premature garbage
-// collection.
-func CompareAndTouch(hash string, buf []byte) error {
+// CompareAndTouch returns the current replication level if one of the
+// volumes already has the given content and it successfully updates
+// the relevant block's modification time in order to protect it from
+// premature garbage collection. Otherwise, it returns a non-nil
+// error.
+func CompareAndTouch(hash string, buf []byte) (int, error) {
        var bestErr error = NotFoundError
        for _, vol := range KeepVM.AllWritable() {
                if err := vol.Compare(hash, buf); err == CollisionError {
@@ -583,7 +585,7 @@ func CompareAndTouch(hash string, buf []byte) error {
                        // both, so there's no point writing it even
                        // on a different volume.)
                        log.Printf("%s: Compare(%s): %s", vol, hash, err)
-                       return err
+                       return 0, err
                } else if os.IsNotExist(err) {
                        // Block does not exist. This is the only
                        // "normal" error: we don't log anything.
@@ -601,17 +603,16 @@ func CompareAndTouch(hash string, buf []byte) error {
                        continue
                }
                // Compare and Touch both worked --> done.
-               return nil
+               return vol.Replication(), nil
        }
-       return bestErr
+       return 0, bestErr
 }
 
 var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
-// IsValidLocator
-//     Return true if the specified string is a valid Keep locator.
-//     When Keep is extended to support hash types other than MD5,
-//     this should be updated to cover those as well.
+// IsValidLocator returns true if the specified string is a valid Keep locator.
+//   When Keep is extended to support hash types other than MD5,
+//   this should be updated to cover those as well.
 //
 func IsValidLocator(loc string) bool {
        return validLocatorRe.MatchString(loc)
@@ -632,10 +633,10 @@ func GetApiToken(req *http.Request) string {
 }
 
 // IsExpired returns true if the given Unix timestamp (expressed as a
-// hexadecimal string) is in the past, or if timestamp_hex cannot be
+// hexadecimal string) is in the past, or if timestampHex cannot be
 // parsed as a hexadecimal string.
-func IsExpired(timestamp_hex string) bool {
-       ts, err := strconv.ParseInt(timestamp_hex, 16, 0)
+func IsExpired(timestampHex string) bool {
+       ts, err := strconv.ParseInt(timestampHex, 16, 0)
        if err != nil {
                log.Printf("IsExpired: %s", err)
                return true
@@ -643,25 +644,25 @@ func IsExpired(timestamp_hex string) bool {
        return time.Unix(ts, 0).Before(time.Now())
 }
 
-// CanDelete returns true if the user identified by api_token is
+// CanDelete returns true if the user identified by apiToken is
 // allowed to delete blocks.
-func CanDelete(api_token string) bool {
-       if api_token == "" {
+func CanDelete(apiToken string) bool {
+       if apiToken == "" {
                return false
        }
        // Blocks may be deleted only when Keep has been configured with a
        // data manager.
-       if IsDataManagerToken(api_token) {
+       if IsDataManagerToken(apiToken) {
                return true
        }
-       // TODO(twp): look up api_token with the API server
+       // TODO(twp): look up apiToken with the API server
        // return true if is_admin is true and if the token
        // has unlimited scope
        return false
 }
 
-// IsDataManagerToken returns true if api_token represents the data
+// IsDataManagerToken returns true if apiToken represents the data
 // manager's token.
-func IsDataManagerToken(api_token string) bool {
-       return data_manager_token != "" && api_token == data_manager_token
+func IsDataManagerToken(apiToken string) bool {
+       return dataManagerToken != "" && apiToken == dataManagerToken
 }
diff --git a/services/keepstore/handlers_with_generic_volume_test.go b/services/keepstore/handlers_with_generic_volume_test.go
new file mode 100644 (file)
index 0000000..9f31f5f
--- /dev/null
@@ -0,0 +1,119 @@
+package main
+
+import (
+       "bytes"
+       "testing"
+)
+
+// A TestableVolumeManagerFactory creates a volume manager with at least two TestableVolume instances.
+// The factory function, and the TestableVolume instances it returns, can use "t" to write
+// logs, fail the current test, etc.
+type TestableVolumeManagerFactory func(t *testing.T) (*RRVolumeManager, []TestableVolume)
+
+// DoHandlersWithGenericVolumeTests runs a set of handler tests with a
+// Volume Manager comprised of TestableVolume instances.
+// It calls factory to create a volume manager with TestableVolume
+// instances for each test case, to avoid leaking state between tests.
+func DoHandlersWithGenericVolumeTests(t *testing.T, factory TestableVolumeManagerFactory) {
+       testGetBlock(t, factory, TestHash, TestBlock)
+       testGetBlock(t, factory, EmptyHash, EmptyBlock)
+       testPutRawBadDataGetBlock(t, factory, TestHash, TestBlock, []byte("baddata"))
+       testPutRawBadDataGetBlock(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+       testPutBlock(t, factory, TestHash, TestBlock)
+       testPutBlock(t, factory, EmptyHash, EmptyBlock)
+       testPutBlockCorrupt(t, factory, TestHash, TestBlock, []byte("baddata"))
+       testPutBlockCorrupt(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+}
+
+// Setup RRVolumeManager with TestableVolumes
+func setupHandlersWithGenericVolumeTest(t *testing.T, factory TestableVolumeManagerFactory) []TestableVolume {
+       vm, testableVolumes := factory(t)
+       KeepVM = vm
+
+       for _, v := range testableVolumes {
+               defer v.Teardown()
+       }
+       defer KeepVM.Close()
+
+       return testableVolumes
+}
+
+// Put a block using PutRaw in just one volume and Get it using GetBlock
+func testGetBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
+       testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
+
+       // Put testBlock in one volume
+       testableVolumes[1].PutRaw(testHash, testBlock)
+
+       // Get should pass
+       buf, err := GetBlock(testHash)
+       if err != nil {
+               t.Fatalf("Error while getting block %s", err)
+       }
+       if bytes.Compare(buf, testBlock) != 0 {
+               t.Errorf("Put succeeded but Get returned %+v, expected %+v", buf, testBlock)
+       }
+}
+
+// Put a bad block using PutRaw and get it.
+func testPutRawBadDataGetBlock(t *testing.T, factory TestableVolumeManagerFactory,
+       testHash string, testBlock []byte, badData []byte) {
+       testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
+
+       // Put bad data for testHash in both volumes
+       testableVolumes[0].PutRaw(testHash, badData)
+       testableVolumes[1].PutRaw(testHash, badData)
+
+       // Get should fail
+       _, err := GetBlock(testHash)
+       if err == nil {
+               t.Fatalf("Expected error while getting corrupt block %v", testHash)
+       }
+}
+
+// Invoke PutBlock twice to ensure CompareAndTouch path is tested.
+func testPutBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
+       setupHandlersWithGenericVolumeTest(t, factory)
+
+       // PutBlock
+       if _, err := PutBlock(testBlock, testHash); err != nil {
+               t.Fatalf("Error during PutBlock: %s", err)
+       }
+
+       // Check that PutBlock succeeds again even after CompareAndTouch
+       if _, err := PutBlock(testBlock, testHash); err != nil {
+               t.Fatalf("Error during PutBlock: %s", err)
+       }
+
+       // Check that PutBlock stored the data as expected
+       buf, err := GetBlock(testHash)
+       if err != nil {
+               t.Fatalf("Error during GetBlock for %q: %s", testHash, err)
+       } else if bytes.Compare(buf, testBlock) != 0 {
+               t.Errorf("Get response incorrect. Expected %q; found %q", testBlock, buf)
+       }
+}
+
+// Put a bad block using PutRaw, overwrite it using PutBlock and get it.
+func testPutBlockCorrupt(t *testing.T, factory TestableVolumeManagerFactory,
+       testHash string, testBlock []byte, badData []byte) {
+       testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
+
+       // Put bad data for testHash in both volumes
+       testableVolumes[0].PutRaw(testHash, badData)
+       testableVolumes[1].PutRaw(testHash, badData)
+
+       // Check that PutBlock with good data succeeds
+       if _, err := PutBlock(testBlock, testHash); err != nil {
+               t.Fatalf("Error during PutBlock for %q: %s", testHash, err)
+       }
+
+       // Put succeeded and overwrote the badData in one volume,
+       // and Get should return the testBlock now, ignoring the bad data.
+       buf, err := GetBlock(testHash)
+       if err != nil {
+               t.Fatalf("Error during GetBlock for %q: %s", testHash, err)
+       } else if bytes.Compare(buf, testBlock) != 0 {
+               t.Errorf("Get response incorrect. Expected %q; found %q", testBlock, buf)
+       }
+}
index 53cf7be6ca09c62828787f1c9229cce13193b480..2528f6d6a6c4dbf2f4b509e670c834aa10b9e618 100644 (file)
@@ -1,9 +1,7 @@
 package main
 
 import (
-       "bufio"
        "bytes"
-       "errors"
        "flag"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
@@ -14,7 +12,6 @@ import (
        "os"
        "os/signal"
        "strings"
-       "sync"
        "syscall"
        "time"
 )
@@ -27,41 +24,45 @@ import (
 
 // Default TCP address on which to listen for requests.
 // Initialized by the --listen flag.
-const DEFAULT_ADDR = ":25107"
+const DefaultAddr = ":25107"
 
 // A Keep "block" is 64MB.
-const BLOCKSIZE = 64 * 1024 * 1024
+const BlockSize = 64 * 1024 * 1024
 
-// A Keep volume must have at least MIN_FREE_KILOBYTES available
+// A Keep volume must have at least MinFreeKilobytes available
 // in order to permit writes.
-const MIN_FREE_KILOBYTES = BLOCKSIZE / 1024
+const MinFreeKilobytes = BlockSize / 1024
 
-var PROC_MOUNTS = "/proc/mounts"
+// Until #6221 is resolved, never_delete must be true.
+// However, allow it to be false in testing with TestDataManagerToken
+const TestDataManagerToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
 
-// enforce_permissions controls whether permission signatures
+// ProcMounts /proc/mounts
+var ProcMounts = "/proc/mounts"
+
+// enforcePermissions controls whether permission signatures
 // should be enforced (affecting GET and DELETE requests).
 // Initialized by the -enforce-permissions flag.
-var enforce_permissions bool
+var enforcePermissions bool
 
-// blob_signature_ttl is the time duration for which new permission
+// blobSignatureTTL is the time duration for which new permission
 // signatures (returned by PUT requests) will be valid.
 // Initialized by the -permission-ttl flag.
-var blob_signature_ttl time.Duration
+var blobSignatureTTL time.Duration
 
-// data_manager_token represents the API token used by the
+// dataManagerToken represents the API token used by the
 // Data Manager, and is required on certain privileged operations.
 // Initialized by the -data-manager-token-file flag.
-var data_manager_token string
+var dataManagerToken string
 
-// never_delete can be used to prevent the DELETE handler from
+// neverDelete can be used to prevent the DELETE handler from
 // actually deleting anything.
-var never_delete = true
+var neverDelete = true
 
 var maxBuffers = 128
 var bufs *bufferPool
 
-// ==========
-// Error types.
+// KeepError types.
 //
 type KeepError struct {
        HTTPCode int
@@ -110,95 +111,16 @@ var KeepVM VolumeManager
 var pullq *WorkQueue
 var trashq *WorkQueue
 
+type volumeSet []Volume
+
 var (
        flagSerializeIO bool
        flagReadonly    bool
+       volumes         volumeSet
 )
 
-type volumeSet []Volume
-
-func (vs *volumeSet) Set(value string) error {
-       if dirs := strings.Split(value, ","); len(dirs) > 1 {
-               log.Print("DEPRECATED: using comma-separated volume list.")
-               for _, dir := range dirs {
-                       if err := vs.Set(dir); err != nil {
-                               return err
-                       }
-               }
-               return nil
-       }
-       if len(value) == 0 || value[0] != '/' {
-               return errors.New("Invalid volume: must begin with '/'.")
-       }
-       if _, err := os.Stat(value); err != nil {
-               return err
-       }
-       var locker sync.Locker
-       if flagSerializeIO {
-               locker = &sync.Mutex{}
-       }
-       *vs = append(*vs, &UnixVolume{
-               root:     value,
-               locker:   locker,
-               readonly: flagReadonly,
-       })
-       return nil
-}
-
 func (vs *volumeSet) String() string {
-       s := "["
-       for i, v := range *vs {
-               if i > 0 {
-                       s = s + " "
-               }
-               s = s + v.String()
-       }
-       return s + "]"
-}
-
-// Discover adds a volume for every directory named "keep" that is
-// located at the top level of a device- or tmpfs-backed mount point
-// other than "/". It returns the number of volumes added.
-func (vs *volumeSet) Discover() int {
-       added := 0
-       f, err := os.Open(PROC_MOUNTS)
-       if err != nil {
-               log.Fatalf("opening %s: %s", PROC_MOUNTS, err)
-       }
-       scanner := bufio.NewScanner(f)
-       for scanner.Scan() {
-               args := strings.Fields(scanner.Text())
-               if err := scanner.Err(); err != nil {
-                       log.Fatalf("reading %s: %s", PROC_MOUNTS, err)
-               }
-               dev, mount := args[0], args[1]
-               if mount == "/" {
-                       continue
-               }
-               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
-                       continue
-               }
-               keepdir := mount + "/keep"
-               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
-                       continue
-               }
-               // Set the -readonly flag (but only for this volume)
-               // if the filesystem is mounted readonly.
-               flagReadonlyWas := flagReadonly
-               for _, fsopt := range strings.Split(args[3], ",") {
-                       if fsopt == "ro" {
-                               flagReadonly = true
-                               break
-                       }
-                       if fsopt == "rw" {
-                               break
-                       }
-               }
-               vs.Set(keepdir)
-               flagReadonly = flagReadonlyWas
-               added++
-       }
-       return added
+       return fmt.Sprintf("%+v", (*vs)[:])
 }
 
 // TODO(twp): continue moving as much code as possible out of main
@@ -211,53 +133,52 @@ func main() {
        defer log.Println("keepstore exiting, pid", os.Getpid())
 
        var (
-               data_manager_token_file string
-               listen                  string
-               blob_signing_key_file   string
-               permission_ttl_sec      int
-               volumes                 volumeSet
-               pidfile                 string
+               dataManagerTokenFile string
+               listen               string
+               blobSigningKeyFile   string
+               permissionTTLSec     int
+               pidfile              string
        )
        flag.StringVar(
-               &data_manager_token_file,
+               &dataManagerTokenFile,
                "data-manager-token-file",
                "",
                "File with the API token used by the Data Manager. All DELETE "+
                        "requests or GET /index requests must carry this token.")
        flag.BoolVar(
-               &enforce_permissions,
+               &enforcePermissions,
                "enforce-permissions",
                false,
                "Enforce permission signatures on requests.")
        flag.StringVar(
                &listen,
                "listen",
-               DEFAULT_ADDR,
+               DefaultAddr,
                "Listening address, in the form \"host:port\". e.g., 10.0.1.24:8000. Omit the host part to listen on all interfaces.")
        flag.BoolVar(
-               &never_delete,
+               &neverDelete,
                "never-delete",
                true,
                "If set, nothing will be deleted. HTTP 405 will be returned "+
                        "for valid DELETE requests.")
        flag.StringVar(
-               &blob_signing_key_file,
+               &blobSigningKeyFile,
                "permission-key-file",
                "",
                "Synonym for -blob-signing-key-file.")
        flag.StringVar(
-               &blob_signing_key_file,
+               &blobSigningKeyFile,
                "blob-signing-key-file",
                "",
                "File containing the secret key for generating and verifying "+
                        "blob permission signatures.")
        flag.IntVar(
-               &permission_ttl_sec,
+               &permissionTTLSec,
                "permission-ttl",
                0,
                "Synonym for -blob-signature-ttl.")
        flag.IntVar(
-               &permission_ttl_sec,
+               &permissionTTLSec,
                "blob-signature-ttl",
                int(time.Duration(2*7*24*time.Hour).Seconds()),
                "Lifetime of blob permission signatures. "+
@@ -272,14 +193,6 @@ func main() {
                "readonly",
                false,
                "Do not write, delete, or touch anything on the following volumes.")
-       flag.Var(
-               &volumes,
-               "volumes",
-               "Deprecated synonym for -volume.")
-       flag.Var(
-               &volumes,
-               "volume",
-               "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
        flag.StringVar(
                &pidfile,
                "pid",
@@ -289,18 +202,14 @@ func main() {
                &maxBuffers,
                "max-buffers",
                maxBuffers,
-               fmt.Sprintf("Maximum RAM to use for data buffers, given in multiples of block size (%d MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.", BLOCKSIZE>>20))
+               fmt.Sprintf("Maximum RAM to use for data buffers, given in multiples of block size (%d MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.", BlockSize>>20))
 
        flag.Parse()
 
-       if never_delete != true {
-               log.Fatal("never_delete must be true, see #6221")
-       }
-
        if maxBuffers < 0 {
                log.Fatal("-max-buffers must be greater than zero.")
        }
-       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+       bufs = newBufferPool(maxBuffers, BlockSize)
 
        if pidfile != "" {
                f, err := os.OpenFile(pidfile, os.O_RDWR|os.O_CREATE, 0777)
@@ -328,7 +237,7 @@ func main() {
        }
 
        if len(volumes) == 0 {
-               if volumes.Discover() == 0 {
+               if (&unixVolumeAdder{&volumes}).Discover() == 0 {
                        log.Fatal("No volumes found.")
                }
        }
@@ -340,25 +249,30 @@ func main() {
        // Initialize data manager token and permission key.
        // If these tokens are specified but cannot be read,
        // raise a fatal error.
-       if data_manager_token_file != "" {
-               if buf, err := ioutil.ReadFile(data_manager_token_file); err == nil {
-                       data_manager_token = strings.TrimSpace(string(buf))
+       if dataManagerTokenFile != "" {
+               if buf, err := ioutil.ReadFile(dataManagerTokenFile); err == nil {
+                       dataManagerToken = strings.TrimSpace(string(buf))
                } else {
                        log.Fatalf("reading data manager token: %s\n", err)
                }
        }
-       if blob_signing_key_file != "" {
-               if buf, err := ioutil.ReadFile(blob_signing_key_file); err == nil {
+
+       if neverDelete != true && dataManagerToken != TestDataManagerToken {
+               log.Fatal("never_delete must be true, see #6221")
+       }
+
+       if blobSigningKeyFile != "" {
+               if buf, err := ioutil.ReadFile(blobSigningKeyFile); err == nil {
                        PermissionSecret = bytes.TrimSpace(buf)
                } else {
                        log.Fatalf("reading permission key: %s\n", err)
                }
        }
 
-       blob_signature_ttl = time.Duration(permission_ttl_sec) * time.Second
+       blobSignatureTTL = time.Duration(permissionTTLSec) * time.Second
 
        if PermissionSecret == nil {
-               if enforce_permissions {
+               if enforcePermissions {
                        log.Fatal("-enforce-permissions requires a permission key")
                } else {
                        log.Println("Running without a PermissionSecret. Block locators " +
index b89925f5bd20568137b178e9861570c173c8d2f0..8a004b73d44913b562574170b21b19dc780b8ff2 100644 (file)
@@ -12,19 +12,23 @@ import (
        "testing"
 )
 
-var TEST_BLOCK = []byte("The quick brown fox jumps over the lazy dog.")
-var TEST_HASH = "e4d909c290d0fb1ca068ffaddf22cbd0"
-var TEST_HASH_PUT_RESPONSE = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
+var TestBlock = []byte("The quick brown fox jumps over the lazy dog.")
+var TestHash = "e4d909c290d0fb1ca068ffaddf22cbd0"
+var TestHashPutResp = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
 
-var TEST_BLOCK_2 = []byte("Pack my box with five dozen liquor jugs.")
-var TEST_HASH_2 = "f15ac516f788aec4f30932ffb6395c39"
+var TestBlock2 = []byte("Pack my box with five dozen liquor jugs.")
+var TestHash2 = "f15ac516f788aec4f30932ffb6395c39"
 
-var TEST_BLOCK_3 = []byte("Now is the time for all good men to come to the aid of their country.")
-var TEST_HASH_3 = "eed29bbffbc2dbe5e5ee0bb71888e61f"
+var TestBlock3 = []byte("Now is the time for all good men to come to the aid of their country.")
+var TestHash3 = "eed29bbffbc2dbe5e5ee0bb71888e61f"
 
-// BAD_BLOCK is used to test collisions and corruption.
+// BadBlock is used to test collisions and corruption.
 // It must not match any test hashes.
-var BAD_BLOCK = []byte("The magic words are squeamish ossifrage.")
+var BadBlock = []byte("The magic words are squeamish ossifrage.")
+
+// Empty block
+var EmptyHash = "d41d8cd98f00b204e9800998ecf8427e"
+var EmptyBlock = []byte("")
 
 // TODO(twp): Tests still to be written
 //
@@ -55,17 +59,17 @@ func TestGetBlock(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllReadable()
-       if err := vols[1].Put(TEST_HASH, TEST_BLOCK); err != nil {
+       if err := vols[1].Put(TestHash, TestBlock); err != nil {
                t.Error(err)
        }
 
        // Check that GetBlock returns success.
-       result, err := GetBlock(TEST_HASH)
+       result, err := GetBlock(TestHash)
        if err != nil {
                t.Errorf("GetBlock error: %s", err)
        }
-       if fmt.Sprint(result) != fmt.Sprint(TEST_BLOCK) {
-               t.Errorf("expected %s, got %s", TEST_BLOCK, result)
+       if fmt.Sprint(result) != fmt.Sprint(TestBlock) {
+               t.Errorf("expected %s, got %s", TestBlock, result)
        }
 }
 
@@ -80,7 +84,7 @@ func TestGetBlockMissing(t *testing.T) {
        defer KeepVM.Close()
 
        // Check that GetBlock returns failure.
-       result, err := GetBlock(TEST_HASH)
+       result, err := GetBlock(TestHash)
        if err != NotFoundError {
                t.Errorf("Expected NotFoundError, got %v", result)
        }
@@ -98,10 +102,10 @@ func TestGetBlockCorrupt(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllReadable()
-       vols[0].Put(TEST_HASH, BAD_BLOCK)
+       vols[0].Put(TestHash, BadBlock)
 
        // Check that GetBlock returns failure.
-       result, err := GetBlock(TEST_HASH)
+       result, err := GetBlock(TestHash)
        if err != DiskHashError {
                t.Errorf("Expected DiskHashError, got %v (buf: %v)", err, result)
        }
@@ -122,18 +126,18 @@ func TestPutBlockOK(t *testing.T) {
        defer KeepVM.Close()
 
        // Check that PutBlock stores the data as expected.
-       if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
-               t.Fatalf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Fatalf("PutBlock: n %d err %v", n, err)
        }
 
        vols := KeepVM.AllReadable()
-       result, err := vols[1].Get(TEST_HASH)
+       result, err := vols[1].Get(TestHash)
        if err != nil {
                t.Fatalf("Volume #0 Get returned error: %v", err)
        }
-       if string(result) != string(TEST_BLOCK) {
+       if string(result) != string(TestBlock) {
                t.Fatalf("PutBlock stored '%s', Get retrieved '%s'",
-                       string(TEST_BLOCK), string(result))
+                       string(TestBlock), string(result))
        }
 }
 
@@ -152,18 +156,18 @@ func TestPutBlockOneVol(t *testing.T) {
        vols[0].(*MockVolume).Bad = true
 
        // Check that PutBlock stores the data as expected.
-       if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
-               t.Fatalf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Fatalf("PutBlock: n %d err %v", n, err)
        }
 
-       result, err := GetBlock(TEST_HASH)
+       result, err := GetBlock(TestHash)
        if err != nil {
                t.Fatalf("GetBlock: %v", err)
        }
-       if string(result) != string(TEST_BLOCK) {
+       if string(result) != string(TestBlock) {
                t.Error("PutBlock/GetBlock mismatch")
                t.Fatalf("PutBlock stored '%s', GetBlock retrieved '%s'",
-                       string(TEST_BLOCK), string(result))
+                       string(TestBlock), string(result))
        }
 }
 
@@ -180,12 +184,12 @@ func TestPutBlockMD5Fail(t *testing.T) {
 
        // Check that PutBlock returns the expected error when the hash does
        // not match the block.
-       if err := PutBlock(BAD_BLOCK, TEST_HASH); err != RequestHashError {
-               t.Error("Expected RequestHashError, got %v", err)
+       if _, err := PutBlock(BadBlock, TestHash); err != RequestHashError {
+               t.Errorf("Expected RequestHashError, got %v", err)
        }
 
        // Confirm that GetBlock fails to return anything.
-       if result, err := GetBlock(TEST_HASH); err != NotFoundError {
+       if result, err := GetBlock(TestHash); err != NotFoundError {
                t.Errorf("GetBlock succeeded after a corrupt block store (result = %s, err = %v)",
                        string(result), err)
        }
@@ -202,17 +206,17 @@ func TestPutBlockCorrupt(t *testing.T) {
        KeepVM = MakeTestVolumeManager(2)
        defer KeepVM.Close()
 
-       // Store a corrupted block under TEST_HASH.
+       // Store a corrupted block under TestHash.
        vols := KeepVM.AllWritable()
-       vols[0].Put(TEST_HASH, BAD_BLOCK)
-       if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
-               t.Errorf("PutBlock: %v", err)
+       vols[0].Put(TestHash, BadBlock)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Errorf("PutBlock: n %d err %v", n, err)
        }
 
-       // The block on disk should now match TEST_BLOCK.
-       if block, err := GetBlock(TEST_HASH); err != nil {
+       // The block on disk should now match TestBlock.
+       if block, err := GetBlock(TestHash); err != nil {
                t.Errorf("GetBlock: %v", err)
-       } else if bytes.Compare(block, TEST_BLOCK) != 0 {
+       } else if bytes.Compare(block, TestBlock) != 0 {
                t.Errorf("GetBlock returned: '%s'", string(block))
        }
 }
@@ -235,10 +239,10 @@ func TestPutBlockCollision(t *testing.T) {
 
        // Store one block, then attempt to store the other. Confirm that
        // PutBlock reported a CollisionError.
-       if err := PutBlock(b1, locator); err != nil {
+       if _, err := PutBlock(b1, locator); err != nil {
                t.Error(err)
        }
-       if err := PutBlock(b2, locator); err == nil {
+       if _, err := PutBlock(b2, locator); err == nil {
                t.Error("PutBlock did not report a collision")
        } else if err != CollisionError {
                t.Errorf("PutBlock returned %v", err)
@@ -260,35 +264,35 @@ func TestPutBlockTouchFails(t *testing.T) {
        // Store a block and then make the underlying volume bad,
        // so a subsequent attempt to update the file timestamp
        // will fail.
-       vols[0].Put(TEST_HASH, BAD_BLOCK)
-       old_mtime, err := vols[0].Mtime(TEST_HASH)
+       vols[0].Put(TestHash, BadBlock)
+       oldMtime, err := vols[0].Mtime(TestHash)
        if err != nil {
-               t.Fatalf("vols[0].Mtime(%s): %s\n", TEST_HASH, err)
+               t.Fatalf("vols[0].Mtime(%s): %s\n", TestHash, err)
        }
 
        // vols[0].Touch will fail on the next call, so the volume
        // manager will store a copy on vols[1] instead.
        vols[0].(*MockVolume).Touchable = false
-       if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
-               t.Fatalf("PutBlock: %v", err)
+       if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+               t.Fatalf("PutBlock: n %d err %v", n, err)
        }
        vols[0].(*MockVolume).Touchable = true
 
        // Now the mtime on the block on vols[0] should be unchanged, and
        // there should be a copy of the block on vols[1].
-       new_mtime, err := vols[0].Mtime(TEST_HASH)
+       newMtime, err := vols[0].Mtime(TestHash)
        if err != nil {
-               t.Fatalf("vols[0].Mtime(%s): %s\n", TEST_HASH, err)
+               t.Fatalf("vols[0].Mtime(%s): %s\n", TestHash, err)
        }
-       if !new_mtime.Equal(old_mtime) {
-               t.Errorf("mtime was changed on vols[0]:\nold_mtime = %v\nnew_mtime = %v\n",
-                       old_mtime, new_mtime)
+       if !newMtime.Equal(oldMtime) {
+               t.Errorf("mtime was changed on vols[0]:\noldMtime = %v\nnewMtime = %v\n",
+                       oldMtime, newMtime)
        }
-       result, err := vols[1].Get(TEST_HASH)
+       result, err := vols[1].Get(TestHash)
        if err != nil {
                t.Fatalf("vols[1]: %v", err)
        }
-       if bytes.Compare(result, TEST_BLOCK) != 0 {
+       if bytes.Compare(result, TestBlock) != 0 {
                t.Errorf("new block does not match test block\nnew block = %v\n", result)
        }
 }
@@ -309,7 +313,7 @@ func TestDiscoverTmpfs(t *testing.T) {
                }
        }
 
-       // Set up a bogus PROC_MOUNTS file.
+       // Set up a bogus ProcMounts file.
        f, err := ioutil.TempFile("", "keeptest")
        if err != nil {
                t.Fatal(err)
@@ -327,10 +331,10 @@ func TestDiscoverTmpfs(t *testing.T) {
                fmt.Fprintf(f, "tmpfs %s tmpfs %s 0 0\n", path.Dir(vol), opts)
        }
        f.Close()
-       PROC_MOUNTS = f.Name()
+       ProcMounts = f.Name()
 
-       var resultVols volumeSet
-       added := resultVols.Discover()
+       resultVols := volumeSet{}
+       added := (&unixVolumeAdder{&resultVols}).Discover()
 
        if added != len(resultVols) {
                t.Errorf("Discover returned %d, but added %d volumes",
@@ -355,7 +359,7 @@ func TestDiscoverTmpfs(t *testing.T) {
 func TestDiscoverNone(t *testing.T) {
        defer teardown()
 
-       // Set up a bogus PROC_MOUNTS file with no Keep vols.
+       // Set up a bogus ProcMounts file with no Keep vols.
        f, err := ioutil.TempFile("", "keeptest")
        if err != nil {
                t.Fatal(err)
@@ -367,10 +371,10 @@ func TestDiscoverNone(t *testing.T) {
        fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
        fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
        f.Close()
-       PROC_MOUNTS = f.Name()
+       ProcMounts = f.Name()
 
-       var resultVols volumeSet
-       added := resultVols.Discover()
+       resultVols := volumeSet{}
+       added := (&unixVolumeAdder{&resultVols}).Discover()
        if added != 0 || len(resultVols) != 0 {
                t.Fatalf("got %d, %v; expected 0, []", added, resultVols)
        }
@@ -388,23 +392,23 @@ func TestIndex(t *testing.T) {
        defer KeepVM.Close()
 
        vols := KeepVM.AllReadable()
-       vols[0].Put(TEST_HASH, TEST_BLOCK)
-       vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
-       vols[0].Put(TEST_HASH_3, TEST_BLOCK_3)
-       vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
-       vols[1].Put(TEST_HASH_2+".meta", []byte("metadata"))
+       vols[0].Put(TestHash, TestBlock)
+       vols[1].Put(TestHash2, TestBlock2)
+       vols[0].Put(TestHash3, TestBlock3)
+       vols[0].Put(TestHash+".meta", []byte("metadata"))
+       vols[1].Put(TestHash2+".meta", []byte("metadata"))
 
        buf := new(bytes.Buffer)
        vols[0].IndexTo("", buf)
        vols[1].IndexTo("", buf)
-       index_rows := strings.Split(string(buf.Bytes()), "\n")
-       sort.Strings(index_rows)
-       sorted_index := strings.Join(index_rows, "\n")
-       expected := `^\n` + TEST_HASH + `\+\d+ \d+\n` +
-               TEST_HASH_3 + `\+\d+ \d+\n` +
-               TEST_HASH_2 + `\+\d+ \d+$`
-
-       match, err := regexp.MatchString(expected, sorted_index)
+       indexRows := strings.Split(string(buf.Bytes()), "\n")
+       sort.Strings(indexRows)
+       sortedIndex := strings.Join(indexRows, "\n")
+       expected := `^\n` + TestHash + `\+\d+ \d+\n` +
+               TestHash3 + `\+\d+ \d+\n` +
+               TestHash2 + `\+\d+ \d+$`
+
+       match, err := regexp.MatchString(expected, sortedIndex)
        if err == nil {
                if !match {
                        t.Errorf("IndexLocators returned:\n%s", string(buf.Bytes()))
@@ -420,8 +424,8 @@ func TestIndex(t *testing.T) {
 
 // MakeTestVolumeManager returns a RRVolumeManager with the specified
 // number of MockVolumes.
-func MakeTestVolumeManager(num_volumes int) VolumeManager {
-       vols := make([]Volume, num_volumes)
+func MakeTestVolumeManager(numVolumes int) VolumeManager {
+       vols := make([]Volume, numVolumes)
        for i := range vols {
                vols[i] = CreateMockVolume()
        }
@@ -430,8 +434,8 @@ func MakeTestVolumeManager(num_volumes int) VolumeManager {
 
 // teardown cleans up after each test.
 func teardown() {
-       data_manager_token = ""
-       enforce_permissions = false
+       dataManagerToken = ""
+       enforcePermissions = false
        PermissionSecret = nil
        KeepVM = nil
 }
index b622d1d3eefd2eb3bedfb5e4260976cb2075f85a..47bb6d77179f226dc20d84a525fa5a0de3a5f7af 100644 (file)
@@ -11,6 +11,7 @@ import (
        "time"
 )
 
+// LoggingResponseWriter has anonymous fields ResponseWriter and ResponseBody
 type LoggingResponseWriter struct {
        Status int
        Length int
@@ -18,6 +19,7 @@ type LoggingResponseWriter struct {
        ResponseBody string
 }
 
+// WriteHeader writes header to ResponseWriter
 func (loggingWriter *LoggingResponseWriter) WriteHeader(code int) {
        loggingWriter.Status = code
        loggingWriter.ResponseWriter.WriteHeader(code)
@@ -31,10 +33,12 @@ func (loggingWriter *LoggingResponseWriter) Write(data []byte) (int, error) {
        return loggingWriter.ResponseWriter.Write(data)
 }
 
+// LoggingRESTRouter is used to add logging capabilities to mux.Router
 type LoggingRESTRouter struct {
        router *mux.Router
 }
 
+// MakeLoggingRESTRouter initializes LoggingRESTRouter
 func MakeLoggingRESTRouter() *LoggingRESTRouter {
        router := MakeRESTRouter()
        return (&LoggingRESTRouter{router})
index e75d91058785e018a064f3255d2f27b4d005ca41..24b549cd656d5eb28cd720be6810ba6c5d2a0192 100644 (file)
@@ -7,17 +7,17 @@ type MockMutex struct {
 
 func NewMockMutex() *MockMutex {
        return &MockMutex{
-               AllowLock: make(chan struct{}),
+               AllowLock:   make(chan struct{}),
                AllowUnlock: make(chan struct{}),
        }
 }
 
 // Lock waits for someone to send to AllowLock.
 func (m *MockMutex) Lock() {
-       <- m.AllowLock
+       <-m.AllowLock
 }
 
 // Unlock waits for someone to send to AllowUnlock.
 func (m *MockMutex) Unlock() {
-       <- m.AllowUnlock
+       <-m.AllowUnlock
 }
index 65160b1868913638e8315a266e0b3736ecfbe14c..6168a321c27e464fff5d0555ed363b2636331c76 100644 (file)
@@ -1,47 +1,7 @@
-/*
-Permissions management on Arvados locator hashes.
-
-The permissions structure for Arvados is as follows (from
-https://arvados.org/issues/2328)
-
-A Keep locator string has the following format:
-
-    [hash]+[size]+A[signature]@[timestamp]
-
-The "signature" string here is a cryptographic hash, expressed as a
-string of hexadecimal digits, and timestamp is a 32-bit Unix timestamp
-expressed as a hexadecimal number.  e.g.:
-
-    acbd18db4cc2f85cedef654fccc4a4d8+3+A257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a@7fffffff
-
-The signature represents a guarantee that this locator was generated
-by either Keep or the API server for use with the supplied API token.
-If a request to Keep includes a locator with a valid signature and is
-accompanied by the proper API token, the user has permission to GET
-that object.
-
-The signature may be generated either by Keep (after the user writes a
-block) or by the API server (if the user has can_read permission on
-the specified object). Keep and API server share a secret that is used
-to generate signatures.
-
-To verify a permission hint, Keep generates a new hint for the
-requested object (using the locator string, the timestamp, the
-permission secret and the user's API token, which must appear in the
-request headers) and compares it against the hint included in the
-request. If the permissions do not match, or if the API token is not
-present, Keep returns a 401 error.
-*/
-
 package main
 
 import (
-       "crypto/hmac"
-       "crypto/sha1"
-       "fmt"
-       "regexp"
-       "strconv"
-       "strings"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "time"
 )
 
@@ -50,68 +10,23 @@ import (
 // key.
 var PermissionSecret []byte
 
-// MakePermSignature returns a string representing the signed permission
-// hint for the blob identified by blob_hash, api_token and expiration timestamp.
-func MakePermSignature(blob_hash string, api_token string, expiry string) string {
-       hmac := hmac.New(sha1.New, PermissionSecret)
-       hmac.Write([]byte(blob_hash))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(api_token))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(expiry))
-       digest := hmac.Sum(nil)
-       return fmt.Sprintf("%x", digest)
-}
-
-// SignLocator takes a blob_locator, an api_token and an expiry time, and
+// SignLocator takes a blobLocator, an apiToken and an expiry time, and
 // returns a signed locator string.
-func SignLocator(blob_locator string, api_token string, expiry time.Time) string {
-       // If no permission secret or API token is available,
-       // return an unsigned locator.
-       if PermissionSecret == nil || api_token == "" {
-               return blob_locator
-       }
-       // Extract the hash from the blob locator, omitting any size hint that may be present.
-       blob_hash := strings.Split(blob_locator, "+")[0]
-       // Return the signed locator string.
-       timestamp_hex := fmt.Sprintf("%08x", expiry.Unix())
-       return blob_locator +
-               "+A" + MakePermSignature(blob_hash, api_token, timestamp_hex) +
-               "@" + timestamp_hex
+func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
+       return keepclient.SignLocator(blobLocator, apiToken, expiry, PermissionSecret)
 }
 
-var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
-
-// VerifySignature returns nil if the signature on the signed_locator
-// can be verified using the given api_token. Otherwise it returns
+// VerifySignature returns nil if the signature on the signedLocator
+// can be verified using the given apiToken. Otherwise it returns
 // either ExpiredError (if the timestamp has expired, which is
 // something the client could have figured out independently) or
 // PermissionError.
-func VerifySignature(signed_locator string, api_token string) error {
-       matches := signedLocatorRe.FindStringSubmatch(signed_locator)
-       if matches == nil {
-               // Could not find a permission signature at all
-               return PermissionError
-       }
-       blob_hash := matches[1]
-       sig_hex := matches[2]
-       exp_hex := matches[3]
-       if exp_time, err := ParseHexTimestamp(exp_hex); err != nil {
-               return PermissionError
-       } else if exp_time.Before(time.Now()) {
+func VerifySignature(signedLocator, apiToken string) error {
+       err := keepclient.VerifySignature(signedLocator, apiToken, PermissionSecret)
+       if err == keepclient.ErrSignatureExpired {
                return ExpiredError
-       }
-       if sig_hex != MakePermSignature(blob_hash, api_token, exp_hex) {
+       } else if err != nil {
                return PermissionError
        }
        return nil
 }
-
-func ParseHexTimestamp(timestamp_hex string) (ts time.Time, err error) {
-       if ts_int, e := strconv.ParseInt(timestamp_hex, 16, 0); e == nil {
-               ts = time.Unix(ts_int, 0)
-       } else {
-               err = e
-       }
-       return ts, err
-}
index e43cb8dcd99bf39d4318153525b4f46c660239ce..f4443fc7be1b423c4f535cccae66f0de32e71648 100644 (file)
 package main
 
 import (
+       "strconv"
        "testing"
        "time"
 )
 
 const (
-       known_hash    = "acbd18db4cc2f85cedef654fccc4a4d8"
-       known_locator = known_hash + "+3"
-       known_token   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
-       known_key     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
+       knownHash    = "acbd18db4cc2f85cedef654fccc4a4d8"
+       knownLocator = knownHash + "+3"
+       knownToken   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
+       knownKey     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
                "p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
                "ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
                "jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
                "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
                "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
                "786u5rw2a9gx743dj3fgq2irk"
-       known_signature      = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
-       known_timestamp      = "7fffffff"
-       known_sig_hint       = "+A" + known_signature + "@" + known_timestamp
-       known_signed_locator = known_locator + known_sig_hint
+       knownSignature     = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+       knownTimestamp     = "7fffffff"
+       knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
+       knownSignedLocator = knownLocator + knownSigHint
 )
 
 func TestSignLocator(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
+       defer func(b []byte) {
+               PermissionSecret = b
+       }(PermissionSecret)
 
-       if ts, err := ParseHexTimestamp(known_timestamp); err != nil {
-               t.Errorf("bad known_timestamp %s", known_timestamp)
-       } else {
-               if known_signed_locator != SignLocator(known_locator, known_token, ts) {
-                       t.Fail()
-               }
+       tsInt, err := strconv.ParseInt(knownTimestamp, 16, 0)
+       if err != nil {
+               t.Fatal(err)
        }
-}
-
-func TestVerifySignature(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(known_signed_locator, known_token) != nil {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureExtraHints(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint, known_token) != nil {
-               t.Fatal("Verify cannot handle hint before permission signature")
-       }
-
-       if VerifySignature(known_locator+known_sig_hint+"+Zfoo", known_token) != nil {
-               t.Fatal("Verify cannot handle hint after permission signature")
-       }
-
-       if VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint+"+Zfoo", known_token) != nil {
-               t.Fatal("Verify cannot handle hints around permission signature")
-       }
-}
-
-// The size hint on the locator string should not affect signature validation.
-func TestVerifySignatureWrongSize(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
+       t0 := time.Unix(tsInt, 0)
 
-       if VerifySignature(known_hash+"+999999"+known_sig_hint, known_token) != nil {
-               t.Fatal("Verify cannot handle incorrect size hint")
+       PermissionSecret = []byte(knownKey)
+       if x := SignLocator(knownLocator, knownToken, t0); x != knownSignedLocator {
+               t.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
        }
 
-       if VerifySignature(known_hash+known_sig_hint, known_token) != nil {
-               t.Fatal("Verify cannot handle missing size hint")
+       PermissionSecret = []byte("arbitrarykey")
+       if x := SignLocator(knownLocator, knownToken, t0); x == knownSignedLocator {
+               t.Fatalf("Got same signature %+q, even though PermissionSecret changed", x)
        }
 }
 
-func TestVerifySignatureBadSig(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
+func TestVerifyLocator(t *testing.T) {
+       defer func(b []byte) {
+               PermissionSecret = b
+       }(PermissionSecret)
 
-       bad_locator := known_locator + "+Aaaaaaaaaaaaaaaa@" + known_timestamp
-       if VerifySignature(bad_locator, known_token) != PermissionError {
-               t.Fail()
+       PermissionSecret = []byte(knownKey)
+       if err := VerifySignature(knownSignedLocator, knownToken); err != nil {
+               t.Fatal(err)
        }
-}
-
-func TestVerifySignatureBadTimestamp(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
-
-       bad_locator := known_locator + "+A" + known_signature + "@OOOOOOOl"
-       if VerifySignature(bad_locator, known_token) != PermissionError {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureBadSecret(t *testing.T) {
-       PermissionSecret = []byte("00000000000000000000")
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(known_signed_locator, known_token) != PermissionError {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureBadToken(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
-
-       if VerifySignature(known_signed_locator, "00000000") != PermissionError {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureExpired(t *testing.T) {
-       PermissionSecret = []byte(known_key)
-       defer func() { PermissionSecret = nil }()
 
-       yesterday := time.Now().AddDate(0, 0, -1)
-       expired_locator := SignLocator(known_hash, known_token, yesterday)
-       if VerifySignature(expired_locator, known_token) != ExpiredError {
-               t.Fail()
+       PermissionSecret = []byte("arbitrarykey")
+       if err := VerifySignature(knownSignedLocator, knownToken); err == nil {
+               t.Fatal("Verified signature even with wrong PermissionSecret")
        }
 }
index acf861119f47fd1b765bcad461d826c369151968..2626d4bf68e1594f394ad4539f0f32a90fe00339 100644 (file)
@@ -11,19 +11,18 @@ import (
        "time"
 )
 
-/*
-       Keepstore initiates pull worker channel goroutine.
-       The channel will process pull list.
-               For each (next) pull request:
-                       For each locator listed, execute Pull on the server(s) listed
-                       Skip the rest of the servers if no errors
-               Repeat
-*/
+// RunPullWorker is used by Keepstore to initiate pull worker channel goroutine.
+//     The channel will process pull list.
+//             For each (next) pull request:
+//                     For each locator listed, execute Pull on the server(s) listed
+//                     Skip the rest of the servers if no errors
+//             Repeat
+//
 func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
        nextItem := pullq.NextItem
        for item := range nextItem {
                pullRequest := item.(PullRequest)
-               err := PullItemAndProcess(item.(PullRequest), GenerateRandomApiToken(), keepClient)
+               err := PullItemAndProcess(item.(PullRequest), GenerateRandomAPIToken(), keepClient)
                pullq.DoneItem <- struct{}{}
                if err == nil {
                        log.Printf("Pull %s success", pullRequest)
@@ -33,25 +32,25 @@ func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
        }
 }
 
-/*
-       For each Pull request:
-               Generate a random API token.
-               Generate a permission signature using this token, timestamp ~60 seconds in the future, and desired block hash.
-               Using this token & signature, retrieve the given block.
-               Write to storage
-*/
+// PullItemAndProcess pulls items from PullQueue and processes them.
+//     For each Pull request:
+//             Generate a random API token.
+//             Generate a permission signature using this token, timestamp ~60 seconds in the future, and desired block hash.
+//             Using this token & signature, retrieve the given block.
+//             Write to storage
+//
 func PullItemAndProcess(pullRequest PullRequest, token string, keepClient *keepclient.KeepClient) (err error) {
        keepClient.Arvados.ApiToken = token
 
-       service_roots := make(map[string]string)
+       serviceRoots := make(map[string]string)
        for _, addr := range pullRequest.Servers {
-               service_roots[addr] = addr
+               serviceRoots[addr] = addr
        }
-       keepClient.SetServiceRoots(service_roots, nil, nil)
+       keepClient.SetServiceRoots(serviceRoots, nil, nil)
 
        // Generate signature with a random token
-       expires_at := time.Now().Add(60 * time.Second)
-       signedLocator := SignLocator(pullRequest.Locator, token, expires_at)
+       expiresAt := time.Now().Add(60 * time.Second)
+       signedLocator := SignLocator(pullRequest.Locator, token, expiresAt)
 
        reader, contentLen, _, err := GetContent(signedLocator, keepClient)
        if err != nil {
@@ -62,16 +61,16 @@ func PullItemAndProcess(pullRequest PullRequest, token string, keepClient *keepc
        }
        defer reader.Close()
 
-       read_content, err := ioutil.ReadAll(reader)
+       readContent, err := ioutil.ReadAll(reader)
        if err != nil {
                return err
        }
 
-       if (read_content == nil) || (int64(len(read_content)) != contentLen) {
+       if (readContent == nil) || (int64(len(readContent)) != contentLen) {
                return errors.New(fmt.Sprintf("Content not found for: %s", signedLocator))
        }
 
-       err = PutContent(read_content, pullRequest.Locator)
+       err = PutContent(readContent, pullRequest.Locator)
        return
 }
 
@@ -82,19 +81,20 @@ var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
        return reader, blocklen, url, err
 }
 
-const ALPHA_NUMERIC = "0123456789abcdefghijklmnopqrstuvwxyz"
+const alphaNumeric = "0123456789abcdefghijklmnopqrstuvwxyz"
 
-func GenerateRandomApiToken() string {
+// GenerateRandomAPIToken generates a random api token
+func GenerateRandomAPIToken() string {
        var bytes = make([]byte, 36)
        rand.Read(bytes)
        for i, b := range bytes {
-               bytes[i] = ALPHA_NUMERIC[b%byte(len(ALPHA_NUMERIC))]
+               bytes[i] = alphaNumeric[b%byte(len(alphaNumeric))]
        }
        return (string(bytes))
 }
 
 // Put block
 var PutContent = func(content []byte, locator string) (err error) {
-       err = PutBlock(content, locator)
+       _, err = PutBlock(content, locator)
        return
 }
index 3e57407369c0dccf8216e3e0835820c0bc419b55..3a3069ab7745c1efc0a21fd8c706565f65c525e0 100644 (file)
@@ -27,7 +27,7 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
 
        // start api and keep servers
        arvadostest.StartAPI()
-       arvadostest.StartKeep()
+       arvadostest.StartKeep(2, false)
 
        // make arvadosclient
        arv, err := arvadosclient.MakeArvadosClient()
@@ -128,7 +128,7 @@ func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pull
                return rdr, int64(len(testData.Content)), "", nil
        }
 
-       keepClient.Arvados.ApiToken = GenerateRandomApiToken()
+       keepClient.Arvados.ApiToken = GenerateRandomAPIToken()
        err := PullItemAndProcess(pullRequest, keepClient.Arvados.ApiToken, keepClient)
 
        if len(testData.GetError) > 0 {
index 37d83b32802af1432bf7ed8f2af5826a3d757914..c6a41953842b6c2ab9403c84c04d17cee839fabd 100644 (file)
@@ -37,7 +37,7 @@ func (s *PullWorkerTestSuite) SetUpTest(c *C) {
 
        // When a new pull request arrives, the old one will be overwritten.
        // This behavior is verified using these two maps in the
-       // "TestPullWorker_pull_list_with_two_items_latest_replacing_old"
+       // "TestPullWorkerPullList_with_two_items_latest_replacing_old"
        testPullLists = make(map[string]string)
 }
 
@@ -53,7 +53,7 @@ func RunTestPullWorker(c *C) {
        go RunPullWorker(pullq, keepClient)
 }
 
-var first_pull_list = []byte(`[
+var firstPullList = []byte(`[
                {
                        "locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
                        "servers":[
@@ -68,7 +68,7 @@ var first_pull_list = []byte(`[
                }
        ]`)
 
-var second_pull_list = []byte(`[
+var secondPullList = []byte(`[
                {
                        "locator":"73feffa4b7f6bb68e44cf984c85f6e88+3",
                        "servers":[
@@ -79,44 +79,44 @@ var second_pull_list = []byte(`[
        ]`)
 
 type PullWorkerTestData struct {
-       name          string
-       req           RequestTester
-       response_code int
-       response_body string
-       read_content  string
-       read_error    bool
-       put_error     bool
+       name         string
+       req          RequestTester
+       responseCode int
+       responseBody string
+       readContent  string
+       readError    bool
+       putError     bool
 }
 
-func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_two_locators(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_locators(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_pull_list_with_two_locators",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", first_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 2 pull requests\n",
-               read_content:  "hello",
-               read_error:    false,
-               put_error:     false,
+               name:         "TestPullWorkerPullList_with_two_locators",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", firstPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 2 pull requests\n",
+               readContent:  "hello",
+               readError:    false,
+               putError:     false,
        }
 
        performTest(testData, c)
 }
 
-func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_one_locator(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_one_locator(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_pull_list_with_one_locator",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 1 pull requests\n",
-               read_content:  "hola",
-               read_error:    false,
-               put_error:     false,
+               name:         "TestPullWorkerPullList_with_one_locator",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 1 pull requests\n",
+               readContent:  "hola",
+               readError:    false,
+               putError:     false,
        }
 
        performTest(testData, c)
@@ -125,15 +125,15 @@ func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_one_locator(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_one_locator(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_error_on_get_one_locator",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 1 pull requests\n",
-               read_content:  "unused",
-               read_error:    true,
-               put_error:     false,
+               name:         "TestPullWorker_error_on_get_one_locator",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 1 pull requests\n",
+               readContent:  "unused",
+               readError:    true,
+               putError:     false,
        }
 
        performTest(testData, c)
@@ -142,15 +142,15 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_one_locator(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_two_locators(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_error_on_get_two_locators",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", first_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 2 pull requests\n",
-               read_content:  "unused",
-               read_error:    true,
-               put_error:     false,
+               name:         "TestPullWorker_error_on_get_two_locators",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", firstPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 2 pull requests\n",
+               readContent:  "unused",
+               readError:    true,
+               putError:     false,
        }
 
        performTest(testData, c)
@@ -159,15 +159,15 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_two_locators(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_one_locator(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_error_on_put_one_locator",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 1 pull requests\n",
-               read_content:  "hello hello",
-               read_error:    false,
-               put_error:     true,
+               name:         "TestPullWorker_error_on_put_one_locator",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 1 pull requests\n",
+               readContent:  "hello hello",
+               readError:    false,
+               putError:     true,
        }
 
        performTest(testData, c)
@@ -176,15 +176,15 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_one_locator(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_two_locators(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_error_on_put_two_locators",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", first_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 2 pull requests\n",
-               read_content:  "hello again",
-               read_error:    false,
-               put_error:     true,
+               name:         "TestPullWorker_error_on_put_two_locators",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", firstPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 2 pull requests\n",
+               readContent:  "hello again",
+               readError:    false,
+               putError:     true,
        }
 
        performTest(testData, c)
@@ -194,7 +194,7 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_two_locators(c *C) {
 // is used to check that behavior by first putting an item on the queue,
 // and then performing the test. Thus the "testPullLists" has two entries;
 // however, processedPullLists will see only the newest item in the list.
-func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_two_items_latest_replacing_old(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_items_latest_replacing_old(c *C) {
        defer teardown()
 
        var firstInput = []int{1}
@@ -202,34 +202,34 @@ func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_two_items_latest_rep
        pullq.ReplaceQueue(makeTestWorkList(firstInput))
        testPullLists["Added_before_actual_test_item"] = string(1)
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_pull_list_with_two_items_latest_replacing_old",
-               req:           RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
-               response_code: http.StatusOK,
-               response_body: "Received 1 pull requests\n",
-               read_content:  "hola de nuevo",
-               read_error:    false,
-               put_error:     false,
+               name:         "TestPullWorkerPullList_with_two_items_latest_replacing_old",
+               req:          RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+               responseCode: http.StatusOK,
+               responseBody: "Received 1 pull requests\n",
+               readContent:  "hola de nuevo",
+               readError:    false,
+               putError:     false,
        }
 
        performTest(testData, c)
 }
 
 // In this case, the item will not be placed on pullq
-func (s *PullWorkerTestSuite) TestPullWorker_invalid_data_manager_token(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorker_invalid_dataManagerToken(c *C) {
        defer teardown()
 
-       data_manager_token = "DATA MANAGER TOKEN"
+       dataManagerToken = "DATA MANAGER TOKEN"
 
        testData := PullWorkerTestData{
-               name:          "TestPullWorker_pull_list_with_two_locators",
-               req:           RequestTester{"/pull", "invalid_data_manager_token", "PUT", first_pull_list},
-               response_code: http.StatusUnauthorized,
-               response_body: "Unauthorized\n",
-               read_content:  "hello",
-               read_error:    false,
-               put_error:     false,
+               name:         "TestPullWorkerPullList_with_two_locators",
+               req:          RequestTester{"/pull", "invalid_dataManagerToken", "PUT", firstPullList},
+               responseCode: http.StatusUnauthorized,
+               responseBody: "Unauthorized\n",
+               readContent:  "hello",
+               readError:    false,
+               putError:     false,
        }
 
        performTest(testData, c)
@@ -243,7 +243,7 @@ func performTest(testData PullWorkerTestData, c *C) {
        defer pullq.Close()
 
        currentTestData = testData
-       testPullLists[testData.name] = testData.response_body
+       testPullLists[testData.name] = testData.responseBody
 
        processedPullLists := make(map[string]string)
 
@@ -253,53 +253,51 @@ func performTest(testData PullWorkerTestData, c *C) {
        }(GetContent)
        GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (reader io.ReadCloser, contentLength int64, url string, err error) {
                c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(1))
-               processedPullLists[testData.name] = testData.response_body
-               if testData.read_error {
+               processedPullLists[testData.name] = testData.responseBody
+               if testData.readError {
                        err = errors.New("Error getting data")
                        readError = err
                        return nil, 0, "", err
-               } else {
-                       readContent = testData.read_content
-                       cb := &ClosingBuffer{bytes.NewBufferString(testData.read_content)}
-                       var rc io.ReadCloser
-                       rc = cb
-                       return rc, int64(len(testData.read_content)), "", nil
                }
+               readContent = testData.readContent
+               cb := &ClosingBuffer{bytes.NewBufferString(testData.readContent)}
+               var rc io.ReadCloser
+               rc = cb
+               return rc, int64(len(testData.readContent)), "", nil
        }
 
        // Override PutContent to mock PutBlock functionality
        defer func(orig func([]byte, string) error) { PutContent = orig }(PutContent)
        PutContent = func(content []byte, locator string) (err error) {
-               if testData.put_error {
+               if testData.putError {
                        err = errors.New("Error putting data")
                        putError = err
                        return err
-               } else {
-                       putContent = content
-                       return nil
                }
+               putContent = content
+               return nil
        }
 
        c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(0))
        c.Assert(getStatusItem("PullQueue", "Queued"), Equals, float64(0))
 
        response := IssueRequest(&testData.req)
-       c.Assert(response.Code, Equals, testData.response_code)
-       c.Assert(response.Body.String(), Equals, testData.response_body)
+       c.Assert(response.Code, Equals, testData.responseCode)
+       c.Assert(response.Body.String(), Equals, testData.responseBody)
 
        expectEqualWithin(c, time.Second, 0, func() interface{} {
                st := pullq.Status()
                return st.InProgress + st.Queued
        })
 
-       if testData.name == "TestPullWorker_pull_list_with_two_items_latest_replacing_old" {
+       if testData.name == "TestPullWorkerPullList_with_two_items_latest_replacing_old" {
                c.Assert(len(testPullLists), Equals, 2)
                c.Assert(len(processedPullLists), Equals, 1)
                c.Assert(testPullLists["Added_before_actual_test_item"], NotNil)
-               c.Assert(testPullLists["TestPullWorker_pull_list_with_two_items_latest_replacing_old"], NotNil)
-               c.Assert(processedPullLists["TestPullWorker_pull_list_with_two_items_latest_replacing_old"], NotNil)
+               c.Assert(testPullLists["TestPullWorkerPullList_with_two_items_latest_replacing_old"], NotNil)
+               c.Assert(processedPullLists["TestPullWorkerPullList_with_two_items_latest_replacing_old"], NotNil)
        } else {
-               if testData.response_code == http.StatusOK {
+               if testData.responseCode == http.StatusOK {
                        c.Assert(len(testPullLists), Equals, 1)
                        c.Assert(len(processedPullLists), Equals, 1)
                        c.Assert(testPullLists[testData.name], NotNil)
@@ -309,16 +307,16 @@ func performTest(testData PullWorkerTestData, c *C) {
                }
        }
 
-       if testData.read_error {
+       if testData.readError {
                c.Assert(readError, NotNil)
-       } else if testData.response_code == http.StatusOK {
+       } else if testData.responseCode == http.StatusOK {
                c.Assert(readError, IsNil)
-               c.Assert(readContent, Equals, testData.read_content)
-               if testData.put_error {
+               c.Assert(readContent, Equals, testData.readContent)
+               if testData.putError {
                        c.Assert(putError, NotNil)
                } else {
                        c.Assert(putError, IsNil)
-                       c.Assert(string(putContent), Equals, testData.read_content)
+                       c.Assert(string(putContent), Equals, testData.readContent)
                }
        }
 
index 8f78658c3a7496473c2d81a7f0d7b13213ef9d5f..65e3fbd2849593e44be94921cb7073a5aba3adaa 100644 (file)
@@ -6,14 +6,12 @@ import (
        "time"
 )
 
-/*
-       Keepstore initiates trash worker channel goroutine.
-       The channel will process trash list.
-               For each (next) trash request:
-      Delete the block indicated by the trash request Locator
-               Repeat
-*/
-
+// RunTrashWorker is used by Keepstore to initiate trash worker channel goroutine.
+//     The channel will process trash list.
+//             For each (next) trash request:
+//      Delete the block indicated by the trash request Locator
+//             Repeat
+//
 func RunTrashWorker(trashq *WorkQueue) {
        for item := range trashq.NextItem {
                trashRequest := item.(TrashRequest)
@@ -25,13 +23,13 @@ func RunTrashWorker(trashq *WorkQueue) {
 // TrashItem deletes the indicated block from every writable volume.
 func TrashItem(trashRequest TrashRequest) {
        reqMtime := time.Unix(trashRequest.BlockMtime, 0)
-       if time.Since(reqMtime) < blob_signature_ttl {
-               log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blob_signature_ttl is %v! Skipping.",
+       if time.Since(reqMtime) < blobSignatureTTL {
+               log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blobSignatureTTL is %v! Skipping.",
                        time.Since(reqMtime),
                        trashRequest.Locator,
                        trashRequest.BlockMtime,
                        reqMtime,
-                       blob_signature_ttl)
+                       blobSignatureTTL)
                return
        }
 
@@ -46,8 +44,8 @@ func TrashItem(trashRequest TrashRequest) {
                        continue
                }
 
-               if never_delete {
-                       err = errors.New("did not delete block because never_delete is true")
+               if neverDelete {
+                       err = errors.New("did not delete block because neverDelete is true")
                } else {
                        err = volume.Delete(trashRequest.Locator)
                }
index a626d9be9b67aff2369d6e4399f90f81b806d99b..1d3063a9de10651cf675062a439403372d39f49f 100644 (file)
@@ -31,7 +31,7 @@ type TrashWorkerTestData struct {
    Expect no errors.
 */
 func TestTrashWorkerIntegration_GetNonExistingLocator(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
                Locator1: "5d41402abc4b2a76b9719d911017c592",
                Block1:   []byte("hello"),
@@ -53,17 +53,17 @@ func TestTrashWorkerIntegration_GetNonExistingLocator(t *testing.T) {
    Expect the second locator in volume 2 to be unaffected.
 */
 func TestTrashWorkerIntegration_LocatorInVolume1(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH_2,
-               Block2:   TEST_BLOCK_2,
+               Locator2: TestHash2,
+               Block2:   TestBlock2,
 
                CreateData: true,
 
-               DeleteLocator: TEST_HASH, // first locator
+               DeleteLocator: TestHash, // first locator
 
                ExpectLocator1: false,
                ExpectLocator2: true,
@@ -75,17 +75,17 @@ func TestTrashWorkerIntegration_LocatorInVolume1(t *testing.T) {
    Expect the first locator in volume 1 to be unaffected.
 */
 func TestTrashWorkerIntegration_LocatorInVolume2(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH_2,
-               Block2:   TEST_BLOCK_2,
+               Locator2: TestHash2,
+               Block2:   TestBlock2,
 
                CreateData: true,
 
-               DeleteLocator: TEST_HASH_2, // locator 2
+               DeleteLocator: TestHash2, // locator 2
 
                ExpectLocator1: true,
                ExpectLocator2: false,
@@ -97,17 +97,17 @@ func TestTrashWorkerIntegration_LocatorInVolume2(t *testing.T) {
    Expect locator to be deleted from both volumes.
 */
 func TestTrashWorkerIntegration_LocatorInBothVolumes(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH,
-               Block2:   TEST_BLOCK,
+               Locator2: TestHash,
+               Block2:   TestBlock,
 
                CreateData: true,
 
-               DeleteLocator: TEST_HASH,
+               DeleteLocator: TestHash,
 
                ExpectLocator1: false,
                ExpectLocator2: false,
@@ -119,18 +119,18 @@ func TestTrashWorkerIntegration_LocatorInBothVolumes(t *testing.T) {
    Delete the second and expect the first to be still around.
 */
 func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH,
-               Block2:   TEST_BLOCK,
+               Locator2: TestHash,
+               Block2:   TestBlock,
 
                CreateData:      true,
                DifferentMtimes: true,
 
-               DeleteLocator: TEST_HASH,
+               DeleteLocator: TestHash,
 
                ExpectLocator1: true,
                ExpectLocator2: false,
@@ -143,18 +143,18 @@ func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *test
    Expect the other unaffected.
 */
 func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH_2,
-               Block2:   TEST_BLOCK_2,
+               Locator2: TestHash2,
+               Block2:   TestBlock2,
 
                CreateData:      true,
                CreateInVolume1: true,
 
-               DeleteLocator: TEST_HASH, // locator 1
+               DeleteLocator: TestHash, // locator 1
 
                ExpectLocator1: false,
                ExpectLocator2: true,
@@ -166,20 +166,20 @@ func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
    will not be deleted becuase its Mtime is within the trash life time.
 */
 func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(t *testing.T) {
-       never_delete = false
+       neverDelete = false
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH_2,
-               Block2:   TEST_BLOCK_2,
+               Locator2: TestHash2,
+               Block2:   TestBlock2,
 
                CreateData:      true,
                CreateInVolume1: true,
 
                UseTrashLifeTime: true,
 
-               DeleteLocator: TEST_HASH, // locator 1
+               DeleteLocator: TestHash, // locator 1
 
                // Since trash life time is in effect, block won't be deleted.
                ExpectLocator1: true,
@@ -188,21 +188,21 @@ func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(
        performTrashWorkerTest(testData, t)
 }
 
-/* Delete a block with matching mtime for locator in both volumes, but never_delete is true,
+/* Delete a block with matching mtime for locator in both volumes, but neverDelete is true,
    so block won't be deleted.
 */
 func TestTrashWorkerIntegration_NeverDelete(t *testing.T) {
-       never_delete = true
+       neverDelete = true
        testData := TrashWorkerTestData{
-               Locator1: TEST_HASH,
-               Block1:   TEST_BLOCK,
+               Locator1: TestHash,
+               Block1:   TestBlock,
 
-               Locator2: TEST_HASH,
-               Block2:   TEST_BLOCK,
+               Locator2: TestHash,
+               Block2:   TestBlock,
 
                CreateData: true,
 
-               DeleteLocator: TEST_HASH,
+               DeleteLocator: TestHash,
 
                ExpectLocator1: true,
                ExpectLocator2: true,
@@ -231,7 +231,7 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
                }
        }
 
-       oldBlockTime := time.Now().Add(-blob_signature_ttl - time.Minute)
+       oldBlockTime := time.Now().Add(-blobSignatureTTL - time.Minute)
 
        // Create TrashRequest for the test
        trashRequest := TrashRequest{
index f57df2486b01d8c0c11c9dc5707907e3c3e304c3..7966c41b92bd89958308ec77765f0b7a5a1f0fd9 100644 (file)
@@ -1,7 +1,3 @@
-// A Volume is an interface representing a Keep back-end storage unit:
-// for example, a single mounted disk, a RAID array, an Amazon S3 volume,
-// etc.
-
 package main
 
 import (
@@ -10,6 +6,9 @@ import (
        "time"
 )
 
+// A Volume is an interface representing a Keep back-end storage unit:
+// for example, a single mounted disk, a RAID array, an Amazon S3 volume,
+// etc.
 type Volume interface {
        // Get a block. IFF the returned error is nil, the caller must
        // put the returned slice back into the buffer pool when it's
@@ -37,7 +36,7 @@ type Volume interface {
        // access log if the block is not found on any other volumes
        // either).
        //
-       // If the data in the backing store is bigger than BLOCKSIZE,
+       // If the data in the backing store is bigger than BlockSize,
        // Get is permitted to return an error without reading any of
        // the data.
        Get(loc string) ([]byte, error)
@@ -53,11 +52,17 @@ type Volume interface {
        //
        // loc is as described in Get.
        //
-       // len(block) is guaranteed to be between 0 and BLOCKSIZE.
+       // len(block) is guaranteed to be between 0 and BlockSize.
        //
        // If a block is already stored under the same name (loc) with
        // different content, Put must either overwrite the existing
-       // data with the new data or return a non-nil error.
+       // data with the new data or return a non-nil error. When
+       // overwriting existing data, it must never leave the storage
+       // device in an inconsistent state: a subsequent call to Get
+       // must return either the entire old block, the entire new
+       // block, or an error. (An implementation that cannot peform
+       // atomic updates must leave the old data alone and return an
+       // error.)
        //
        // Put also sets the timestamp for the given locator to the
        // current time.
@@ -122,7 +127,7 @@ type Volume interface {
        //
        //   - size is the number of bytes of content, given as a
        //     decimal number with one or more digits
-       //     
+       //
        //   - timestamp is the timestamp stored for the locator,
        //     given as a decimal number of seconds after January 1,
        //     1970 UTC.
@@ -145,7 +150,7 @@ type Volume interface {
        // loc is as described in Get.
        //
        // If the timestamp for the given locator is newer than
-       // blob_signature_ttl, Delete must not delete the data.
+       // blobSignatureTTL, Delete must not delete the data.
        //
        // If a Delete operation overlaps with any Touch or Put
        // operations on the same locator, the implementation must
@@ -166,7 +171,7 @@ type Volume interface {
        // reliably or fail outright.
        //
        // Corollary: A successful Touch or Put guarantees a block
-       // will not be deleted for at least blob_signature_ttl
+       // will not be deleted for at least blobSignatureTTL
        // seconds.
        Delete(loc string) error
 
@@ -190,6 +195,11 @@ type Volume interface {
        // will fail because it is full, but Mtime or Delete can
        // succeed -- then Writable should return false.
        Writable() bool
+
+       // Replication returns the storage redundancy of the
+       // underlying device. It will be passed on to clients in
+       // responses to PUT requests.
+       Replication() int
 }
 
 // A VolumeManager tells callers which volumes can read, which volumes
@@ -222,6 +232,7 @@ type RRVolumeManager struct {
        counter   uint32
 }
 
+// MakeRRVolumeManager initializes RRVolumeManager
 func MakeRRVolumeManager(volumes []Volume) *RRVolumeManager {
        vm := &RRVolumeManager{}
        for _, v := range volumes {
@@ -233,14 +244,17 @@ func MakeRRVolumeManager(volumes []Volume) *RRVolumeManager {
        return vm
 }
 
+// AllReadable returns an array of all readable volumes
 func (vm *RRVolumeManager) AllReadable() []Volume {
        return vm.readables
 }
 
+// AllWritable returns an array of all writable volumes
 func (vm *RRVolumeManager) AllWritable() []Volume {
        return vm.writables
 }
 
+// NextWritable returns the next writable
 func (vm *RRVolumeManager) NextWritable() Volume {
        if len(vm.writables) == 0 {
                return nil
@@ -249,5 +263,18 @@ func (vm *RRVolumeManager) NextWritable() Volume {
        return vm.writables[i%uint32(len(vm.writables))]
 }
 
+// Close the RRVolumeManager
 func (vm *RRVolumeManager) Close() {
 }
+
+// VolumeStatus provides status information of the volume consisting of:
+//   * mount_point
+//   * device_num (an integer identifying the underlying storage system)
+//   * bytes_free
+//   * bytes_used
+type VolumeStatus struct {
+       MountPoint string `json:"mount_point"`
+       DeviceNum  uint64 `json:"device_num"`
+       BytesFree  uint64 `json:"bytes_free"`
+       BytesUsed  uint64 `json:"bytes_used"`
+}
index 0c0629c2a1b0c6ec6812112e4a9955513261d940..61088f10fa2d4ef30e969a77e107b824073684e6 100644 (file)
@@ -2,54 +2,687 @@ package main
 
 import (
        "bytes"
+       "crypto/md5"
+       "fmt"
        "os"
+       "regexp"
+       "sort"
+       "strings"
        "testing"
        "time"
 )
 
 // A TestableVolumeFactory returns a new TestableVolume. The factory
-// function, and the TestableVolume it returns, can use t to write
+// function, and the TestableVolume it returns, can use "t" to write
 // logs, fail the current test, etc.
 type TestableVolumeFactory func(t *testing.T) TestableVolume
 
 // DoGenericVolumeTests runs a set of tests that every TestableVolume
-// is expected to pass. It calls factory to create a new
-// TestableVolume for each test case, to avoid leaking state between
-// tests.
+// is expected to pass. It calls factory to create a new TestableVolume
+// for each test case, to avoid leaking state between tests.
 func DoGenericVolumeTests(t *testing.T, factory TestableVolumeFactory) {
+       testGet(t, factory)
+       testGetNoSuchBlock(t, factory)
+
+       testCompareNonexistent(t, factory)
+       testCompareSameContent(t, factory, TestHash, TestBlock)
+       testCompareSameContent(t, factory, EmptyHash, EmptyBlock)
+       testCompareWithCollision(t, factory, TestHash, TestBlock, []byte("baddata"))
+       testCompareWithCollision(t, factory, TestHash, TestBlock, EmptyBlock)
+       testCompareWithCollision(t, factory, EmptyHash, EmptyBlock, TestBlock)
+       testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, []byte("baddata"))
+       testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, EmptyBlock)
+       testCompareWithCorruptStoredData(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+
+       testPutBlockWithSameContent(t, factory, TestHash, TestBlock)
+       testPutBlockWithSameContent(t, factory, EmptyHash, EmptyBlock)
+       testPutBlockWithDifferentContent(t, factory, TestHash, TestBlock, TestBlock2)
+       testPutBlockWithDifferentContent(t, factory, TestHash, EmptyBlock, TestBlock)
+       testPutBlockWithDifferentContent(t, factory, TestHash, TestBlock, EmptyBlock)
+       testPutBlockWithDifferentContent(t, factory, EmptyHash, EmptyBlock, TestBlock)
+       testPutMultipleBlocks(t, factory)
+
+       testPutAndTouch(t, factory)
+       testTouchNoSuchBlock(t, factory)
+
+       testMtimeNoSuchBlock(t, factory)
+
+       testIndexTo(t, factory)
+
        testDeleteNewBlock(t, factory)
        testDeleteOldBlock(t, factory)
+       testDeleteNoSuchBlock(t, factory)
+
+       testStatus(t, factory)
+
+       testString(t, factory)
+
+       testUpdateReadOnly(t, factory)
+
+       testGetConcurrent(t, factory)
+       testPutConcurrent(t, factory)
+
+       testPutFullBlock(t, factory)
+}
+
+// Put a test block, get it and verify content
+// Test should pass for both writable and read-only volumes
+func testGet(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       v.PutRaw(TestHash, TestBlock)
+
+       buf, err := v.Get(TestHash)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       bufs.Put(buf)
+
+       if bytes.Compare(buf, TestBlock) != 0 {
+               t.Errorf("expected %s, got %s", string(TestBlock), string(buf))
+       }
+}
+
+// Invoke get on a block that does not exist in volume; should result in error
+// Test should pass for both writable and read-only volumes
+func testGetNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if _, err := v.Get(TestHash2); err == nil {
+               t.Errorf("Expected error while getting non-existing block %v", TestHash2)
+       }
+}
+
+// Compare() should return os.ErrNotExist if the block does not exist.
+// Otherwise, writing new data causes CompareAndTouch() to generate
+// error logs even though everything is working fine.
+func testCompareNonexistent(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       err := v.Compare(TestHash, TestBlock)
+       if err != os.ErrNotExist {
+               t.Errorf("Got err %T %q, expected os.ErrNotExist", err, err)
+       }
+}
+
+// Put a test block and compare the locator with same content
+// Test should pass for both writable and read-only volumes
+func testCompareSameContent(t *testing.T, factory TestableVolumeFactory, testHash string, testData []byte) {
+       v := factory(t)
+       defer v.Teardown()
+
+       v.PutRaw(testHash, testData)
+
+       // Compare the block locator with same content
+       err := v.Compare(testHash, testData)
+       if err != nil {
+               t.Errorf("Got err %q, expected nil", err)
+       }
+}
+
+// Test behavior of Compare() when stored data matches expected
+// checksum but differs from new data we need to store. Requires
+// testHash = md5(testDataA).
+//
+// Test should pass for both writable and read-only volumes
+func testCompareWithCollision(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+       v := factory(t)
+       defer v.Teardown()
+
+       v.PutRaw(testHash, testDataA)
+
+       // Compare the block locator with different content; collision
+       err := v.Compare(TestHash, testDataB)
+       if err == nil {
+               t.Errorf("Got err nil, expected error due to collision")
+       }
+}
+
+// Test behavior of Compare() when stored data has become
+// corrupted. Requires testHash = md5(testDataA) != md5(testDataB).
+//
+// Test should pass for both writable and read-only volumes
+func testCompareWithCorruptStoredData(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+       v := factory(t)
+       defer v.Teardown()
+
+       v.PutRaw(TestHash, testDataB)
+
+       err := v.Compare(testHash, testDataA)
+       if err == nil || err == CollisionError {
+               t.Errorf("Got err %+v, expected non-collision error", err)
+       }
+}
+
+// Put a block and put again with same content
+// Test is intended for only writable volumes
+func testPutBlockWithSameContent(t *testing.T, factory TestableVolumeFactory, testHash string, testData []byte) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if v.Writable() == false {
+               return
+       }
+
+       err := v.Put(testHash, testData)
+       if err != nil {
+               t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
+       }
+
+       err = v.Put(testHash, testData)
+       if err != nil {
+               t.Errorf("Got err putting block second time %q: %q, expected nil", TestBlock, err)
+       }
+}
+
+// Put a block and put again with different content
+// Test is intended for only writable volumes
+func testPutBlockWithDifferentContent(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if v.Writable() == false {
+               return
+       }
+
+       err := v.Put(testHash, testDataA)
+       if err != nil {
+               t.Errorf("Got err putting block %q: %q, expected nil", testDataA, err)
+       }
+
+       putErr := v.Put(testHash, testDataB)
+       buf, getErr := v.Get(testHash)
+       if putErr == nil {
+               // Put must not return a nil error unless it has
+               // overwritten the existing data.
+               if bytes.Compare(buf, testDataB) != 0 {
+                       t.Errorf("Put succeeded but Get returned %+q, expected %+q", buf, testDataB)
+               }
+       } else {
+               // It is permissible for Put to fail, but it must
+               // leave us with either the original data, the new
+               // data, or nothing at all.
+               if getErr == nil && bytes.Compare(buf, testDataA) != 0 && bytes.Compare(buf, testDataB) != 0 {
+                       t.Errorf("Put failed but Get returned %+q, which is neither %+q nor %+q", buf, testDataA, testDataB)
+               }
+       }
+       if getErr == nil {
+               bufs.Put(buf)
+       }
+}
+
+// Put and get multiple blocks
+// Test is intended for only writable volumes
+func testPutMultipleBlocks(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if v.Writable() == false {
+               return
+       }
+
+       err := v.Put(TestHash, TestBlock)
+       if err != nil {
+               t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
+       }
+
+       err = v.Put(TestHash2, TestBlock2)
+       if err != nil {
+               t.Errorf("Got err putting block %q: %q, expected nil", TestBlock2, err)
+       }
+
+       err = v.Put(TestHash3, TestBlock3)
+       if err != nil {
+               t.Errorf("Got err putting block %q: %q, expected nil", TestBlock3, err)
+       }
+
+       data, err := v.Get(TestHash)
+       if err != nil {
+               t.Error(err)
+       } else {
+               if bytes.Compare(data, TestBlock) != 0 {
+                       t.Errorf("Block present, but got %+q, expected %+q", data, TestBlock)
+               }
+               bufs.Put(data)
+       }
+
+       data, err = v.Get(TestHash2)
+       if err != nil {
+               t.Error(err)
+       } else {
+               if bytes.Compare(data, TestBlock2) != 0 {
+                       t.Errorf("Block present, but got %+q, expected %+q", data, TestBlock2)
+               }
+               bufs.Put(data)
+       }
+
+       data, err = v.Get(TestHash3)
+       if err != nil {
+               t.Error(err)
+       } else {
+               if bytes.Compare(data, TestBlock3) != 0 {
+                       t.Errorf("Block present, but to %+q, expected %+q", data, TestBlock3)
+               }
+               bufs.Put(data)
+       }
+}
+
+// testPutAndTouch
+//   Test that when applying PUT to a block that already exists,
+//   the block's modification time is updated.
+// Test is intended for only writable volumes
+func testPutAndTouch(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if v.Writable() == false {
+               return
+       }
+
+       if err := v.Put(TestHash, TestBlock); err != nil {
+               t.Error(err)
+       }
+
+       // We'll verify { t0 < threshold < t1 }, where t0 is the
+       // existing block's timestamp on disk before Put() and t1 is
+       // its timestamp after Put().
+       threshold := time.Now().Add(-time.Second)
+
+       // Set the stored block's mtime far enough in the past that we
+       // can see the difference between "timestamp didn't change"
+       // and "timestamp granularity is too low".
+       v.TouchWithDate(TestHash, time.Now().Add(-20*time.Second))
+
+       // Make sure v.Mtime() agrees the above Utime really worked.
+       if t0, err := v.Mtime(TestHash); err != nil || t0.IsZero() || !t0.Before(threshold) {
+               t.Errorf("Setting mtime failed: %v, %v", t0, err)
+       }
+
+       // Write the same block again.
+       if err := v.Put(TestHash, TestBlock); err != nil {
+               t.Error(err)
+       }
+
+       // Verify threshold < t1
+       if t1, err := v.Mtime(TestHash); err != nil {
+               t.Error(err)
+       } else if t1.Before(threshold) {
+               t.Errorf("t1 %v should be >= threshold %v after v.Put ", t1, threshold)
+       }
+}
+
+// Touching a non-existing block should result in error.
+// Test should pass for both writable and read-only volumes
+func testTouchNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if err := v.Touch(TestHash); err == nil {
+               t.Error("Expected error when attempted to touch a non-existing block")
+       }
 }
 
-// Calling Delete() for a block immediately after writing it should
-// neither delete the data nor return an error.
+// Invoking Mtime on a non-existing block should result in error.
+// Test should pass for both writable and read-only volumes
+func testMtimeNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if _, err := v.Mtime("12345678901234567890123456789012"); err == nil {
+               t.Error("Expected error when updating Mtime on a non-existing block")
+       }
+}
+
+// Put a few blocks and invoke IndexTo with:
+// * no prefix
+// * with a prefix
+// * with no such prefix
+// Test should pass for both writable and read-only volumes
+func testIndexTo(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       v.PutRaw(TestHash, TestBlock)
+       v.PutRaw(TestHash2, TestBlock2)
+       v.PutRaw(TestHash3, TestBlock3)
+
+       // Blocks whose names aren't Keep hashes should be omitted from
+       // index
+       v.PutRaw("fffffffffnotreallyahashfffffffff", nil)
+       v.PutRaw("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", nil)
+       v.PutRaw("f0000000000000000000000000000000f", nil)
+       v.PutRaw("f00", nil)
+
+       buf := new(bytes.Buffer)
+       v.IndexTo("", buf)
+       indexRows := strings.Split(string(buf.Bytes()), "\n")
+       sort.Strings(indexRows)
+       sortedIndex := strings.Join(indexRows, "\n")
+       m, err := regexp.MatchString(
+               `^\n`+TestHash+`\+\d+ \d+\n`+
+                       TestHash3+`\+\d+ \d+\n`+
+                       TestHash2+`\+\d+ \d+$`,
+               sortedIndex)
+       if err != nil {
+               t.Error(err)
+       } else if !m {
+               t.Errorf("Got index %q for empty prefix", sortedIndex)
+       }
+
+       for _, prefix := range []string{"f", "f15", "f15ac"} {
+               buf = new(bytes.Buffer)
+               v.IndexTo(prefix, buf)
+
+               m, err := regexp.MatchString(`^`+TestHash2+`\+\d+ \d+\n$`, string(buf.Bytes()))
+               if err != nil {
+                       t.Error(err)
+               } else if !m {
+                       t.Errorf("Got index %q for prefix %s", string(buf.Bytes()), prefix)
+               }
+       }
+
+       for _, prefix := range []string{"zero", "zip", "zilch"} {
+               buf = new(bytes.Buffer)
+               v.IndexTo(prefix, buf)
+               if err != nil {
+                       t.Errorf("Got error on IndexTo with no such prefix %v", err.Error())
+               } else if buf.Len() != 0 {
+                       t.Errorf("Expected empty list for IndexTo with no such prefix %s", prefix)
+               }
+       }
+}
+
+// Calling Delete() for a block immediately after writing it (not old enough)
+// should neither delete the data nor return an error.
+// Test is intended for only writable volumes
 func testDeleteNewBlock(t *testing.T, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
-       v.Put(TEST_HASH, TEST_BLOCK)
+       blobSignatureTTL = 300 * time.Second
+
+       if v.Writable() == false {
+               return
+       }
 
-       if err := v.Delete(TEST_HASH); err != nil {
+       v.Put(TestHash, TestBlock)
+
+       if err := v.Delete(TestHash); err != nil {
                t.Error(err)
        }
-       if data, err := v.Get(TEST_HASH); err != nil {
+       data, err := v.Get(TestHash)
+       if err != nil {
                t.Error(err)
-       } else if bytes.Compare(data, TEST_BLOCK) != 0 {
-               t.Error("Block still present, but content is incorrect: %+v != %+v", data, TEST_BLOCK)
+       } else {
+               if bytes.Compare(data, TestBlock) != 0 {
+                       t.Errorf("Got data %+q, expected %+q", data, TestBlock)
+               }
+               bufs.Put(data)
        }
 }
 
 // Calling Delete() for a block with a timestamp older than
-// blob_signature_ttl seconds in the past should delete the data.
+// blobSignatureTTL seconds in the past should delete the data.
+// Test is intended for only writable volumes
 func testDeleteOldBlock(t *testing.T, factory TestableVolumeFactory) {
        v := factory(t)
        defer v.Teardown()
-       v.Put(TEST_HASH, TEST_BLOCK)
-       v.TouchWithDate(TEST_HASH, time.Now().Add(-2*blob_signature_ttl*time.Second))
+       blobSignatureTTL = 300 * time.Second
+
+       if v.Writable() == false {
+               return
+       }
+
+       v.Put(TestHash, TestBlock)
+       v.TouchWithDate(TestHash, time.Now().Add(-2*blobSignatureTTL))
+
+       if err := v.Delete(TestHash); err != nil {
+               t.Error(err)
+       }
+       if _, err := v.Get(TestHash); err == nil || !os.IsNotExist(err) {
+               t.Errorf("os.IsNotExist(%v) should have been true", err)
+       }
+}
+
+// Calling Delete() for a block that does not exist should result in error.
+// Test should pass for both writable and read-only volumes
+func testDeleteNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if err := v.Delete(TestHash2); err == nil {
+               t.Errorf("Expected error when attempting to delete a non-existing block")
+       }
+}
+
+// Invoke Status and verify that VolumeStatus is returned
+// Test should pass for both writable and read-only volumes
+func testStatus(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       // Get node status and make a basic sanity check.
+       status := v.Status()
+       if status.DeviceNum == 0 {
+               t.Errorf("uninitialized device_num in %v", status)
+       }
+
+       if status.BytesFree == 0 {
+               t.Errorf("uninitialized bytes_free in %v", status)
+       }
+
+       if status.BytesUsed == 0 {
+               t.Errorf("uninitialized bytes_used in %v", status)
+       }
+}
+
+// Invoke String for the volume; expect non-empty result
+// Test should pass for both writable and read-only volumes
+func testString(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
 
-       if err := v.Delete(TEST_HASH); err != nil {
+       if id := v.String(); len(id) == 0 {
+               t.Error("Got empty string for v.String()")
+       }
+}
+
+// Putting, updating, touching, and deleting blocks from a read-only volume result in error.
+// Test is intended for only read-only volumes
+func testUpdateReadOnly(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if v.Writable() == true {
+               return
+       }
+
+       v.PutRaw(TestHash, TestBlock)
+
+       // Get from read-only volume should succeed
+       _, err := v.Get(TestHash)
+       if err != nil {
+               t.Errorf("got err %v, expected nil", err)
+       }
+
+       // Put a new block to read-only volume should result in error
+       err = v.Put(TestHash2, TestBlock2)
+       if err == nil {
+               t.Errorf("Expected error when putting block in a read-only volume")
+       }
+       _, err = v.Get(TestHash2)
+       if err == nil {
+               t.Errorf("Expected error when getting block whose put in read-only volume failed")
+       }
+
+       // Touch a block in read-only volume should result in error
+       err = v.Touch(TestHash)
+       if err == nil {
+               t.Errorf("Expected error when touching block in a read-only volume")
+       }
+
+       // Delete a block from a read-only volume should result in error
+       err = v.Delete(TestHash)
+       if err == nil {
+               t.Errorf("Expected error when deleting block from a read-only volume")
+       }
+
+       // Overwriting an existing block in read-only volume should result in error
+       err = v.Put(TestHash, TestBlock)
+       if err == nil {
+               t.Errorf("Expected error when putting block in a read-only volume")
+       }
+}
+
+// Launch concurrent Gets
+// Test should pass for both writable and read-only volumes
+func testGetConcurrent(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       v.PutRaw(TestHash, TestBlock)
+       v.PutRaw(TestHash2, TestBlock2)
+       v.PutRaw(TestHash3, TestBlock3)
+
+       sem := make(chan int)
+       go func(sem chan int) {
+               buf, err := v.Get(TestHash)
+               if err != nil {
+                       t.Errorf("err1: %v", err)
+               }
+               bufs.Put(buf)
+               if bytes.Compare(buf, TestBlock) != 0 {
+                       t.Errorf("buf should be %s, is %s", string(TestBlock), string(buf))
+               }
+               sem <- 1
+       }(sem)
+
+       go func(sem chan int) {
+               buf, err := v.Get(TestHash2)
+               if err != nil {
+                       t.Errorf("err2: %v", err)
+               }
+               bufs.Put(buf)
+               if bytes.Compare(buf, TestBlock2) != 0 {
+                       t.Errorf("buf should be %s, is %s", string(TestBlock2), string(buf))
+               }
+               sem <- 1
+       }(sem)
+
+       go func(sem chan int) {
+               buf, err := v.Get(TestHash3)
+               if err != nil {
+                       t.Errorf("err3: %v", err)
+               }
+               bufs.Put(buf)
+               if bytes.Compare(buf, TestBlock3) != 0 {
+                       t.Errorf("buf should be %s, is %s", string(TestBlock3), string(buf))
+               }
+               sem <- 1
+       }(sem)
+
+       // Wait for all goroutines to finish
+       for done := 0; done < 3; {
+               done += <-sem
+       }
+}
+
+// Launch concurrent Puts
+// Test is intended for only writable volumes
+func testPutConcurrent(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if v.Writable() == false {
+               return
+       }
+
+       sem := make(chan int)
+       go func(sem chan int) {
+               err := v.Put(TestHash, TestBlock)
+               if err != nil {
+                       t.Errorf("err1: %v", err)
+               }
+               sem <- 1
+       }(sem)
+
+       go func(sem chan int) {
+               err := v.Put(TestHash2, TestBlock2)
+               if err != nil {
+                       t.Errorf("err2: %v", err)
+               }
+               sem <- 1
+       }(sem)
+
+       go func(sem chan int) {
+               err := v.Put(TestHash3, TestBlock3)
+               if err != nil {
+                       t.Errorf("err3: %v", err)
+               }
+               sem <- 1
+       }(sem)
+
+       // Wait for all goroutines to finish
+       for done := 0; done < 3; {
+               done += <-sem
+       }
+
+       // Double check that we actually wrote the blocks we expected to write.
+       buf, err := v.Get(TestHash)
+       if err != nil {
+               t.Errorf("Get #1: %v", err)
+       }
+       bufs.Put(buf)
+       if bytes.Compare(buf, TestBlock) != 0 {
+               t.Errorf("Get #1: expected %s, got %s", string(TestBlock), string(buf))
+       }
+
+       buf, err = v.Get(TestHash2)
+       if err != nil {
+               t.Errorf("Get #2: %v", err)
+       }
+       bufs.Put(buf)
+       if bytes.Compare(buf, TestBlock2) != 0 {
+               t.Errorf("Get #2: expected %s, got %s", string(TestBlock2), string(buf))
+       }
+
+       buf, err = v.Get(TestHash3)
+       if err != nil {
+               t.Errorf("Get #3: %v", err)
+       }
+       bufs.Put(buf)
+       if bytes.Compare(buf, TestBlock3) != 0 {
+               t.Errorf("Get #3: expected %s, got %s", string(TestBlock3), string(buf))
+       }
+}
+
+// Write and read back a full size block
+func testPutFullBlock(t *testing.T, factory TestableVolumeFactory) {
+       v := factory(t)
+       defer v.Teardown()
+
+       if !v.Writable() {
+               return
+       }
+
+       wdata := make([]byte, BlockSize)
+       wdata[0] = 'a'
+       wdata[BlockSize-1] = 'z'
+       hash := fmt.Sprintf("%x", md5.Sum(wdata))
+       err := v.Put(hash, wdata)
+       if err != nil {
+               t.Fatal(err)
+       }
+       rdata, err := v.Get(hash)
+       if err != nil {
                t.Error(err)
+       } else {
+               defer bufs.Put(rdata)
        }
-       if _, err := v.Get(TEST_HASH); err == nil || !os.IsNotExist(err) {
-               t.Errorf("os.IsNotExist(%v) should have been true", err.Error())
+       if bytes.Compare(rdata, wdata) != 0 {
+               t.Error("rdata != wdata")
        }
 }
index 290722557598fd64118feac5eb567192cdbc08c2..d6714365de5bef98ad082b93f595231993bafa48 100644 (file)
@@ -52,7 +52,7 @@ type MockVolume struct {
        // channel unblocks all operations. By default, Gate is a
        // closed channel, so all operations proceed without
        // blocking. See trash_worker_test.go for an example.
-       Gate   chan struct{}
+       Gate chan struct{}
 
        called map[string]int
        mutex  sync.Mutex
@@ -78,11 +78,11 @@ func CreateMockVolume() *MockVolume {
 func (v *MockVolume) CallCount(method string) int {
        v.mutex.Lock()
        defer v.mutex.Unlock()
-       if c, ok := v.called[method]; !ok {
+       c, ok := v.called[method]
+       if !ok {
                return 0
-       } else {
-               return c
        }
+       return c
 }
 
 func (v *MockVolume) gotCall(method string) {
@@ -190,7 +190,7 @@ func (v *MockVolume) Delete(loc string) error {
                return MethodDisabledError
        }
        if _, ok := v.Store[loc]; ok {
-               if time.Since(v.Timestamps[loc]) < blob_signature_ttl {
+               if time.Since(v.Timestamps[loc]) < blobSignatureTTL {
                        return nil
                }
                delete(v.Store, loc)
@@ -214,3 +214,7 @@ func (v *MockVolume) String() string {
 func (v *MockVolume) Writable() bool {
        return !v.Readonly
 }
+
+func (v *MockVolume) Replication() int {
+       return 1
+}
index 74bee52387c5f291f7edc8cc0c36e7ef5b48dd9e..910cc25d613cb7690f944b418aebf5c205c7aced 100644 (file)
@@ -1,7 +1,9 @@
 package main
 
 import (
-       "bytes"
+       "bufio"
+       "errors"
+       "flag"
        "fmt"
        "io"
        "io/ioutil"
@@ -16,6 +18,97 @@ import (
        "time"
 )
 
+type unixVolumeAdder struct {
+       *volumeSet
+}
+
+func (vs *unixVolumeAdder) Set(value string) error {
+       if dirs := strings.Split(value, ","); len(dirs) > 1 {
+               log.Print("DEPRECATED: using comma-separated volume list.")
+               for _, dir := range dirs {
+                       if err := vs.Set(dir); err != nil {
+                               return err
+                       }
+               }
+               return nil
+       }
+       if len(value) == 0 || value[0] != '/' {
+               return errors.New("Invalid volume: must begin with '/'.")
+       }
+       if _, err := os.Stat(value); err != nil {
+               return err
+       }
+       var locker sync.Locker
+       if flagSerializeIO {
+               locker = &sync.Mutex{}
+       }
+       *vs.volumeSet = append(*vs.volumeSet, &UnixVolume{
+               root:     value,
+               locker:   locker,
+               readonly: flagReadonly,
+       })
+       return nil
+}
+
+func init() {
+       flag.Var(
+               &unixVolumeAdder{&volumes},
+               "volumes",
+               "Deprecated synonym for -volume.")
+       flag.Var(
+               &unixVolumeAdder{&volumes},
+               "volume",
+               "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
+}
+
+// Discover adds a UnixVolume for every directory named "keep" that is
+// located at the top level of a device- or tmpfs-backed mount point
+// other than "/". It returns the number of volumes added.
+func (vs *unixVolumeAdder) Discover() int {
+       added := 0
+       f, err := os.Open(ProcMounts)
+       if err != nil {
+               log.Fatalf("opening %s: %s", ProcMounts, err)
+       }
+       scanner := bufio.NewScanner(f)
+       for scanner.Scan() {
+               args := strings.Fields(scanner.Text())
+               if err := scanner.Err(); err != nil {
+                       log.Fatalf("reading %s: %s", ProcMounts, err)
+               }
+               dev, mount := args[0], args[1]
+               if mount == "/" {
+                       continue
+               }
+               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
+                       continue
+               }
+               keepdir := mount + "/keep"
+               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
+                       continue
+               }
+               // Set the -readonly flag (but only for this volume)
+               // if the filesystem is mounted readonly.
+               flagReadonlyWas := flagReadonly
+               for _, fsopt := range strings.Split(args[3], ",") {
+                       if fsopt == "ro" {
+                               flagReadonly = true
+                               break
+                       }
+                       if fsopt == "rw" {
+                               break
+                       }
+               }
+               if err := vs.Set(keepdir); err != nil {
+                       log.Printf("adding %q: %s", keepdir, err)
+               } else {
+                       added++
+               }
+               flagReadonly = flagReadonlyWas
+       }
+       return added
+}
+
 // A UnixVolume stores and retrieves blocks in a local directory.
 type UnixVolume struct {
        // path to the volume's root directory
@@ -26,6 +119,7 @@ type UnixVolume struct {
        readonly bool
 }
 
+// Touch sets the timestamp for the given locator to the current time
 func (v *UnixVolume) Touch(loc string) error {
        if v.readonly {
                return MethodDisabledError
@@ -49,13 +143,14 @@ func (v *UnixVolume) Touch(loc string) error {
        return syscall.Utime(p, &utime)
 }
 
+// Mtime returns the stored timestamp for the given locator.
 func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
        p := v.blockPath(loc)
-       if fi, err := os.Stat(p); err != nil {
+       fi, err := os.Stat(p)
+       if err != nil {
                return time.Time{}, err
-       } else {
-               return fi.ModTime(), nil
        }
+       return fi.ModTime(), nil
 }
 
 // Lock the locker (if one is in use), open the file for reading, and
@@ -79,7 +174,7 @@ func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
        if err == nil {
                if stat.Size() < 0 {
                        err = os.ErrInvalid
-               } else if stat.Size() > BLOCKSIZE {
+               } else if stat.Size() > BlockSize {
                        err = TooLongError
                }
        }
@@ -94,7 +189,7 @@ func (v *UnixVolume) Get(loc string) ([]byte, error) {
        path := v.blockPath(loc)
        stat, err := v.stat(path)
        if err != nil {
-               return nil, err
+               return nil, v.translateError(err)
        }
        buf := bufs.Get(int(stat.Size()))
        err = v.getFunc(path, func(rdr io.Reader) error {
@@ -113,36 +208,11 @@ func (v *UnixVolume) Get(loc string) ([]byte, error) {
 // bytes.Compare(), but uses less memory.
 func (v *UnixVolume) Compare(loc string, expect []byte) error {
        path := v.blockPath(loc)
-       stat, err := v.stat(path)
-       if err != nil {
-               return err
-       }
-       bufLen := 1 << 20
-       if int64(bufLen) > stat.Size() {
-               bufLen = int(stat.Size())
+       if _, err := v.stat(path); err != nil {
+               return v.translateError(err)
        }
-       cmp := expect
-       buf := make([]byte, bufLen)
        return v.getFunc(path, func(rdr io.Reader) error {
-               // Loop invariants: all data read so far matched what
-               // we expected, and the first N bytes of cmp are
-               // expected to equal the next N bytes read from
-               // reader.
-               for {
-                       n, err := rdr.Read(buf)
-                       if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
-                               return collisionOrCorrupt(loc[:32], expect[:len(expect)-len(cmp)], buf[:n], rdr)
-                       }
-                       cmp = cmp[n:]
-                       if err == io.EOF {
-                               if len(cmp) != 0 {
-                                       return collisionOrCorrupt(loc[:32], expect[:len(expect)-len(cmp)], nil, nil)
-                               }
-                               return nil
-                       } else if err != nil {
-                               return err
-                       }
-               }
+               return compareReaderWithBuf(rdr, expect, loc[:32])
        })
 }
 
@@ -222,6 +292,7 @@ func (v *UnixVolume) Status() *VolumeStatus {
 }
 
 var blockDirRe = regexp.MustCompile(`^[0-9a-f]+$`)
+var blockFileRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
 // IndexTo writes (to the given Writer) a list of blocks found on this
 // volume which begin with the specified prefix. If the prefix is an
@@ -278,6 +349,9 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
                        if !strings.HasPrefix(name, prefix) {
                                continue
                        }
+                       if !blockFileRe.MatchString(name) {
+                               continue
+                       }
                        _, err = fmt.Fprint(w,
                                name,
                                "+", fileInfo[0].Size(),
@@ -288,6 +362,7 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
        }
 }
 
+// Delete deletes the block data from the unix storage
 func (v *UnixVolume) Delete(loc string) error {
        // Touch() must be called before calling Write() on a block.  Touch()
        // also uses lockfile().  This avoids a race condition between Write()
@@ -315,7 +390,7 @@ func (v *UnixVolume) Delete(loc string) error {
        }
        defer unlockfile(f)
 
-       // If the block has been PUT in the last blob_signature_ttl
+       // If the block has been PUT in the last blobSignatureTTL
        // seconds, return success without removing the block. This
        // protects data from garbage collection until it is no longer
        // possible for clients to retrieve the unreferenced blocks
@@ -323,7 +398,7 @@ func (v *UnixVolume) Delete(loc string) error {
        if fi, err := os.Stat(p); err != nil {
                return err
        } else {
-               if time.Since(fi.ModTime()) < blob_signature_ttl {
+               if time.Since(fi.ModTime()) < blobSignatureTTL {
                        return nil
                }
        }
@@ -343,7 +418,7 @@ func (v *UnixVolume) blockPath(loc string) string {
 }
 
 // IsFull returns true if the free space on the volume is less than
-// MIN_FREE_KILOBYTES.
+// MinFreeKilobytes.
 //
 func (v *UnixVolume) IsFull() (isFull bool) {
        fullSymlink := v.root + "/full"
@@ -359,7 +434,7 @@ func (v *UnixVolume) IsFull() (isFull bool) {
        }
 
        if avail, err := v.FreeDiskSpace(); err == nil {
-               isFull = avail < MIN_FREE_KILOBYTES
+               isFull = avail < MinFreeKilobytes
        } else {
                log.Printf("%s: FreeDiskSpace: %s\n", v, err)
                isFull = false
@@ -391,10 +466,15 @@ func (v *UnixVolume) String() string {
        return fmt.Sprintf("[UnixVolume %s]", v.root)
 }
 
+// Writable returns false if all future Put, Mtime, and Delete calls are expected to fail.
 func (v *UnixVolume) Writable() bool {
        return !v.readonly
 }
 
+func (v *UnixVolume) Replication() int {
+       return 1
+}
+
 // lockfile and unlockfile use flock(2) to manage kernel file locks.
 func lockfile(f *os.File) error {
        return syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
@@ -403,3 +483,16 @@ func lockfile(f *os.File) error {
 func unlockfile(f *os.File) error {
        return syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
 }
+
+// Where appropriate, translate a more specific filesystem error to an
+// error recognized by handlers, like os.ErrNotExist.
+func (v *UnixVolume) translateError(err error) error {
+       switch err.(type) {
+       case *os.PathError:
+               // stat() returns a PathError if the parent directory
+               // (not just the file itself) is missing
+               return os.ErrNotExist
+       default:
+               return err
+       }
+}
index 9f370420b8d848c177faa998400db918c7cc99fc..924637f58e5004f1cec307266c87c0b53ae81d03 100644 (file)
@@ -7,8 +7,6 @@ import (
        "io"
        "io/ioutil"
        "os"
-       "regexp"
-       "sort"
        "strings"
        "sync"
        "syscall"
@@ -66,32 +64,49 @@ func (v *TestableUnixVolume) Teardown() {
        }
 }
 
+// serialize = false; readonly = false
 func TestUnixVolumeWithGenericTests(t *testing.T) {
        DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
                return NewTestableUnixVolume(t, false, false)
        })
 }
 
-func TestGet(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
-       defer v.Teardown()
-       v.Put(TEST_HASH, TEST_BLOCK)
+// serialize = false; readonly = true
+func TestUnixVolumeWithGenericTestsReadOnly(t *testing.T) {
+       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+               return NewTestableUnixVolume(t, false, true)
+       })
+}
 
-       buf, err := v.Get(TEST_HASH)
-       if err != nil {
-               t.Error(err)
-       }
-       if bytes.Compare(buf, TEST_BLOCK) != 0 {
-               t.Errorf("expected %s, got %s", string(TEST_BLOCK), string(buf))
-       }
+// serialize = true; readonly = false
+func TestUnixVolumeWithGenericTestsSerialized(t *testing.T) {
+       DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+               return NewTestableUnixVolume(t, true, false)
+       })
+}
+
+// serialize = false; readonly = false
+func TestUnixVolumeHandlersWithGenericVolumeTests(t *testing.T) {
+       DoHandlersWithGenericVolumeTests(t, func(t *testing.T) (*RRVolumeManager, []TestableVolume) {
+               vols := make([]Volume, 2)
+               testableUnixVols := make([]TestableVolume, 2)
+
+               for i := range vols {
+                       v := NewTestableUnixVolume(t, false, false)
+                       vols[i] = v
+                       testableUnixVols[i] = v
+               }
+
+               return MakeRRVolumeManager(vols), testableUnixVols
+       })
 }
 
 func TestGetNotFound(t *testing.T) {
        v := NewTestableUnixVolume(t, false, false)
        defer v.Teardown()
-       v.Put(TEST_HASH, TEST_BLOCK)
+       v.Put(TestHash, TestBlock)
 
-       buf, err := v.Get(TEST_HASH_2)
+       buf, err := v.Get(TestHash2)
        switch {
        case os.IsNotExist(err):
                break
@@ -102,56 +117,20 @@ func TestGetNotFound(t *testing.T) {
        }
 }
 
-func TestIndexTo(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
-       defer v.Teardown()
-
-       v.Put(TEST_HASH, TEST_BLOCK)
-       v.Put(TEST_HASH_2, TEST_BLOCK_2)
-       v.Put(TEST_HASH_3, TEST_BLOCK_3)
-
-       buf := new(bytes.Buffer)
-       v.IndexTo("", buf)
-       index_rows := strings.Split(string(buf.Bytes()), "\n")
-       sort.Strings(index_rows)
-       sorted_index := strings.Join(index_rows, "\n")
-       m, err := regexp.MatchString(
-               `^\n`+TEST_HASH+`\+\d+ \d+\n`+
-                       TEST_HASH_3+`\+\d+ \d+\n`+
-                       TEST_HASH_2+`\+\d+ \d+$`,
-               sorted_index)
-       if err != nil {
-               t.Error(err)
-       } else if !m {
-               t.Errorf("Got index %q for empty prefix", sorted_index)
-       }
-
-       for _, prefix := range []string{"f", "f15", "f15ac"} {
-               buf = new(bytes.Buffer)
-               v.IndexTo(prefix, buf)
-               m, err := regexp.MatchString(`^`+TEST_HASH_2+`\+\d+ \d+\n$`, string(buf.Bytes()))
-               if err != nil {
-                       t.Error(err)
-               } else if !m {
-                       t.Errorf("Got index %q for prefix %q", string(buf.Bytes()), prefix)
-               }
-       }
-}
-
 func TestPut(t *testing.T) {
        v := NewTestableUnixVolume(t, false, false)
        defer v.Teardown()
 
-       err := v.Put(TEST_HASH, TEST_BLOCK)
+       err := v.Put(TestHash, TestBlock)
        if err != nil {
                t.Error(err)
        }
-       p := fmt.Sprintf("%s/%s/%s", v.root, TEST_HASH[:3], TEST_HASH)
+       p := fmt.Sprintf("%s/%s/%s", v.root, TestHash[:3], TestHash)
        if buf, err := ioutil.ReadFile(p); err != nil {
                t.Error(err)
-       } else if bytes.Compare(buf, TEST_BLOCK) != 0 {
+       } else if bytes.Compare(buf, TestBlock) != 0 {
                t.Errorf("Write should have stored %s, did store %s",
-                       string(TEST_BLOCK), string(buf))
+                       string(TestBlock), string(buf))
        }
 }
 
@@ -160,7 +139,7 @@ func TestPutBadVolume(t *testing.T) {
        defer v.Teardown()
 
        os.Chmod(v.root, 000)
-       err := v.Put(TEST_HASH, TEST_BLOCK)
+       err := v.Put(TestHash, TestBlock)
        if err == nil {
                t.Error("Write should have failed")
        }
@@ -170,207 +149,44 @@ func TestUnixVolumeReadonly(t *testing.T) {
        v := NewTestableUnixVolume(t, false, true)
        defer v.Teardown()
 
-       v.PutRaw(TEST_HASH, TEST_BLOCK)
+       v.PutRaw(TestHash, TestBlock)
 
-       _, err := v.Get(TEST_HASH)
+       _, err := v.Get(TestHash)
        if err != nil {
                t.Errorf("got err %v, expected nil", err)
        }
 
-       err = v.Put(TEST_HASH, TEST_BLOCK)
+       err = v.Put(TestHash, TestBlock)
        if err != MethodDisabledError {
                t.Errorf("got err %v, expected MethodDisabledError", err)
        }
 
-       err = v.Touch(TEST_HASH)
+       err = v.Touch(TestHash)
        if err != MethodDisabledError {
                t.Errorf("got err %v, expected MethodDisabledError", err)
        }
 
-       err = v.Delete(TEST_HASH)
+       err = v.Delete(TestHash)
        if err != MethodDisabledError {
                t.Errorf("got err %v, expected MethodDisabledError", err)
        }
 }
 
-// TestPutTouch
-//     Test that when applying PUT to a block that already exists,
-//     the block's modification time is updated.
-func TestPutTouch(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
-       defer v.Teardown()
-
-       if err := v.Put(TEST_HASH, TEST_BLOCK); err != nil {
-               t.Error(err)
-       }
-
-       // We'll verify { t0 < threshold < t1 }, where t0 is the
-       // existing block's timestamp on disk before Put() and t1 is
-       // its timestamp after Put().
-       threshold := time.Now().Add(-time.Second)
-
-       // Set the stored block's mtime far enough in the past that we
-       // can see the difference between "timestamp didn't change"
-       // and "timestamp granularity is too low".
-       v.TouchWithDate(TEST_HASH, time.Now().Add(-20*time.Second))
-
-       // Make sure v.Mtime() agrees the above Utime really worked.
-       if t0, err := v.Mtime(TEST_HASH); err != nil || t0.IsZero() || !t0.Before(threshold) {
-               t.Errorf("Setting mtime failed: %v, %v", t0, err)
-       }
-
-       // Write the same block again.
-       if err := v.Put(TEST_HASH, TEST_BLOCK); err != nil {
-               t.Error(err)
-       }
-
-       // Verify threshold < t1
-       if t1, err := v.Mtime(TEST_HASH); err != nil {
-               t.Error(err)
-       } else if t1.Before(threshold) {
-               t.Errorf("t1 %v should be >= threshold %v after v.Put ", t1, threshold)
-       }
-}
-
-// Serialization tests: launch a bunch of concurrent
-//
-// TODO(twp): show that the underlying Read/Write operations executed
-// serially and not concurrently. The easiest way to do this is
-// probably to activate verbose or debug logging, capture log output
-// and examine it to confirm that Reads and Writes did not overlap.
-//
-// TODO(twp): a proper test of I/O serialization requires that a
-// second request start while the first one is still underway.
-// Guaranteeing that the test behaves this way requires some tricky
-// synchronization and mocking.  For now we'll just launch a bunch of
-// requests simultaenously in goroutines and demonstrate that they
-// return accurate results.
-//
-func TestGetSerialized(t *testing.T) {
-       // Create a volume with I/O serialization enabled.
-       v := NewTestableUnixVolume(t, true, false)
-       defer v.Teardown()
-
-       v.Put(TEST_HASH, TEST_BLOCK)
-       v.Put(TEST_HASH_2, TEST_BLOCK_2)
-       v.Put(TEST_HASH_3, TEST_BLOCK_3)
-
-       sem := make(chan int)
-       go func(sem chan int) {
-               buf, err := v.Get(TEST_HASH)
-               if err != nil {
-                       t.Errorf("err1: %v", err)
-               }
-               if bytes.Compare(buf, TEST_BLOCK) != 0 {
-                       t.Errorf("buf should be %s, is %s", string(TEST_BLOCK), string(buf))
-               }
-               sem <- 1
-       }(sem)
-
-       go func(sem chan int) {
-               buf, err := v.Get(TEST_HASH_2)
-               if err != nil {
-                       t.Errorf("err2: %v", err)
-               }
-               if bytes.Compare(buf, TEST_BLOCK_2) != 0 {
-                       t.Errorf("buf should be %s, is %s", string(TEST_BLOCK_2), string(buf))
-               }
-               sem <- 1
-       }(sem)
-
-       go func(sem chan int) {
-               buf, err := v.Get(TEST_HASH_3)
-               if err != nil {
-                       t.Errorf("err3: %v", err)
-               }
-               if bytes.Compare(buf, TEST_BLOCK_3) != 0 {
-                       t.Errorf("buf should be %s, is %s", string(TEST_BLOCK_3), string(buf))
-               }
-               sem <- 1
-       }(sem)
-
-       // Wait for all goroutines to finish
-       for done := 0; done < 3; {
-               done += <-sem
-       }
-}
-
-func TestPutSerialized(t *testing.T) {
-       // Create a volume with I/O serialization enabled.
-       v := NewTestableUnixVolume(t, true, false)
-       defer v.Teardown()
-
-       sem := make(chan int)
-       go func(sem chan int) {
-               err := v.Put(TEST_HASH, TEST_BLOCK)
-               if err != nil {
-                       t.Errorf("err1: %v", err)
-               }
-               sem <- 1
-       }(sem)
-
-       go func(sem chan int) {
-               err := v.Put(TEST_HASH_2, TEST_BLOCK_2)
-               if err != nil {
-                       t.Errorf("err2: %v", err)
-               }
-               sem <- 1
-       }(sem)
-
-       go func(sem chan int) {
-               err := v.Put(TEST_HASH_3, TEST_BLOCK_3)
-               if err != nil {
-                       t.Errorf("err3: %v", err)
-               }
-               sem <- 1
-       }(sem)
-
-       // Wait for all goroutines to finish
-       for done := 0; done < 3; {
-               done += <-sem
-       }
-
-       // Double check that we actually wrote the blocks we expected to write.
-       buf, err := v.Get(TEST_HASH)
-       if err != nil {
-               t.Errorf("Get #1: %v", err)
-       }
-       if bytes.Compare(buf, TEST_BLOCK) != 0 {
-               t.Errorf("Get #1: expected %s, got %s", string(TEST_BLOCK), string(buf))
-       }
-
-       buf, err = v.Get(TEST_HASH_2)
-       if err != nil {
-               t.Errorf("Get #2: %v", err)
-       }
-       if bytes.Compare(buf, TEST_BLOCK_2) != 0 {
-               t.Errorf("Get #2: expected %s, got %s", string(TEST_BLOCK_2), string(buf))
-       }
-
-       buf, err = v.Get(TEST_HASH_3)
-       if err != nil {
-               t.Errorf("Get #3: %v", err)
-       }
-       if bytes.Compare(buf, TEST_BLOCK_3) != 0 {
-               t.Errorf("Get #3: expected %s, got %s", string(TEST_BLOCK_3), string(buf))
-       }
-}
-
 func TestIsFull(t *testing.T) {
        v := NewTestableUnixVolume(t, false, false)
        defer v.Teardown()
 
-       full_path := v.root + "/full"
+       fullPath := v.root + "/full"
        now := fmt.Sprintf("%d", time.Now().Unix())
-       os.Symlink(now, full_path)
+       os.Symlink(now, fullPath)
        if !v.IsFull() {
                t.Errorf("%s: claims not to be full", v)
        }
-       os.Remove(full_path)
+       os.Remove(fullPath)
 
        // Test with an expired /full link.
        expired := fmt.Sprintf("%d", time.Now().Unix()-3605)
-       os.Symlink(expired, full_path)
+       os.Symlink(expired, fullPath)
        if v.IsFull() {
                t.Errorf("%s: should no longer be full", v)
        }
@@ -400,9 +216,9 @@ func TestUnixVolumeGetFuncWorkerError(t *testing.T) {
        v := NewTestableUnixVolume(t, false, false)
        defer v.Teardown()
 
-       v.Put(TEST_HASH, TEST_BLOCK)
+       v.Put(TestHash, TestBlock)
        mockErr := errors.New("Mock error")
-       err := v.getFunc(v.blockPath(TEST_HASH), func(rdr io.Reader) error {
+       err := v.getFunc(v.blockPath(TestHash), func(rdr io.Reader) error {
                return mockErr
        })
        if err != mockErr {
@@ -415,7 +231,7 @@ func TestUnixVolumeGetFuncFileError(t *testing.T) {
        defer v.Teardown()
 
        funcCalled := false
-       err := v.getFunc(v.blockPath(TEST_HASH), func(rdr io.Reader) error {
+       err := v.getFunc(v.blockPath(TestHash), func(rdr io.Reader) error {
                funcCalled = true
                return nil
        })
@@ -431,13 +247,13 @@ func TestUnixVolumeGetFuncWorkerWaitsOnMutex(t *testing.T) {
        v := NewTestableUnixVolume(t, false, false)
        defer v.Teardown()
 
-       v.Put(TEST_HASH, TEST_BLOCK)
+       v.Put(TestHash, TestBlock)
 
        mtx := NewMockMutex()
        v.locker = mtx
 
        funcCalled := make(chan struct{})
-       go v.getFunc(v.blockPath(TEST_HASH), func(rdr io.Reader) error {
+       go v.getFunc(v.blockPath(TestHash), func(rdr io.Reader) error {
                funcCalled <- struct{}{}
                return nil
        })
@@ -466,27 +282,39 @@ func TestUnixVolumeCompare(t *testing.T) {
        v := NewTestableUnixVolume(t, false, false)
        defer v.Teardown()
 
-       v.Put(TEST_HASH, TEST_BLOCK)
-       err := v.Compare(TEST_HASH, TEST_BLOCK)
+       v.Put(TestHash, TestBlock)
+       err := v.Compare(TestHash, TestBlock)
        if err != nil {
                t.Errorf("Got err %q, expected nil", err)
        }
 
-       err = v.Compare(TEST_HASH, []byte("baddata"))
+       err = v.Compare(TestHash, []byte("baddata"))
        if err != CollisionError {
                t.Errorf("Got err %q, expected %q", err, CollisionError)
        }
 
-       v.Put(TEST_HASH, []byte("baddata"))
-       err = v.Compare(TEST_HASH, TEST_BLOCK)
+       v.Put(TestHash, []byte("baddata"))
+       err = v.Compare(TestHash, TestBlock)
        if err != DiskHashError {
                t.Errorf("Got err %q, expected %q", err, DiskHashError)
        }
 
-       p := fmt.Sprintf("%s/%s/%s", v.root, TEST_HASH[:3], TEST_HASH)
+       p := fmt.Sprintf("%s/%s/%s", v.root, TestHash[:3], TestHash)
        os.Chmod(p, 000)
-       err = v.Compare(TEST_HASH, TEST_BLOCK)
+       err = v.Compare(TestHash, TestBlock)
        if err == nil || strings.Index(err.Error(), "permission denied") < 0 {
                t.Errorf("Got err %q, expected %q", err, "permission denied")
        }
 }
+
+// TODO(twp): show that the underlying Read/Write operations executed
+// serially and not concurrently. The easiest way to do this is
+// probably to activate verbose or debug logging, capture log output
+// and examine it to confirm that Reads and Writes did not overlap.
+//
+// TODO(twp): a proper test of I/O serialization requires that a
+// second request start while the first one is still underway.
+// Guaranteeing that the test behaves this way requires some tricky
+// synchronization and mocking.  For now we'll just launch a bunch of
+// requests simultaenously in goroutines and demonstrate that they
+// return accurate results.
index f1878ffbbc550250ab88c5ea9a4a694d12d63132..27646ad3d8d98c9b58e8693c079e3b40a14d1e1e 100644 (file)
@@ -84,6 +84,7 @@ package main
 
 import "container/list"
 
+// WorkQueue definition
 type WorkQueue struct {
        getStatus chan WorkQueueStatus
        newlist   chan *list.List
@@ -96,6 +97,7 @@ type WorkQueue struct {
        DoneItem chan<- struct{}
 }
 
+// WorkQueueStatus reflects the queue status.
 type WorkQueueStatus struct {
        InProgress int
        Queued     int
index fc11e2776b93c62d4a6580e24a5b92e027f963e0..c92fc9b00dd0cc723e4fd64bd2b001daa8bce60c 100755 (executable)
@@ -100,6 +100,7 @@ begin
     FileUtils.chown_R(l[:username], l[:username], userdotssh)
     File.chmod(0700, userdotssh)
     File.chmod(0750, @homedir)
+    File.chmod(0600, userauthkeys)
   end
 
   devnull.close
index f5186074c6bc30c9d2230e91ea12764daddb797b..6e46bc0f4c6283ab0d73da212529299bdec4ba10 100644 (file)
@@ -30,6 +30,19 @@ def arvados_timestamp(timestr):
 def timestamp_fresh(timestamp, fresh_time):
     return (time.time() - timestamp) < fresh_time
 
+def arvados_node_missing(arvados_node, fresh_time):
+    """Indicate if cloud node corresponding to the arvados
+    node is "missing".
+
+    If True, this means the node has not pinged the API server within the timeout
+    period.  If False, the ping is up to date.  If the node has never pinged,
+    returns None.
+    """
+    if arvados_node["last_ping_at"] is None:
+        return None
+    else:
+        return not timestamp_fresh(arvados_timestamp(arvados_node["last_ping_at"]), fresh_time)
+
 class ShutdownTimer(object):
     """Keep track of a cloud node's shutdown windows.
 
index 6d5c223fac15d6e25a95f44446eb88f4b54a6f42..1c828c13c3b5033aa92353c2b403c179e00120c6 100644 (file)
@@ -10,7 +10,7 @@ import libcloud.common.types as cloud_types
 import pykka
 
 from .. import \
-    arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh
+    arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh, arvados_node_missing
 from ...clientactor import _notify_subscribers
 from ... import config
 
@@ -273,8 +273,10 @@ class ComputeNodeMonitorActor(config.actor_class):
     for shutdown.
     """
     def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer,
-                 cloud_fqdn_func, timer_actor, update_actor, arvados_node=None,
-                 poll_stale_after=600, node_stale_after=3600):
+                 cloud_fqdn_func, timer_actor, update_actor, cloud_client,
+                 arvados_node=None, poll_stale_after=600, node_stale_after=3600,
+                 boot_fail_after=1800
+    ):
         super(ComputeNodeMonitorActor, self).__init__()
         self._later = self.actor_ref.proxy()
         self._logger = logging.getLogger('arvnodeman.computenode')
@@ -283,10 +285,12 @@ class ComputeNodeMonitorActor(config.actor_class):
         self._cloud_node_fqdn = cloud_fqdn_func
         self._timer = timer_actor
         self._update = update_actor
+        self._cloud = cloud_client
         self.cloud_node = cloud_node
         self.cloud_node_start_time = cloud_node_start_time
         self.poll_stale_after = poll_stale_after
         self.node_stale_after = node_stale_after
+        self.boot_fail_after = boot_fail_after
         self.subscribers = set()
         self.arvados_node = None
         self._later.update_arvados_node(arvados_node)
@@ -321,13 +325,21 @@ class ComputeNodeMonitorActor(config.actor_class):
     def shutdown_eligible(self):
         if not self._shutdowns.window_open():
             return False
-        elif self.arvados_node is None:
-            # If this is a new, unpaired node, it's eligible for
-            # shutdown--we figure there was an error during bootstrap.
-            return timestamp_fresh(self.cloud_node_start_time,
-                                   self.node_stale_after)
-        else:
-            return self.in_state('idle')
+        if self.arvados_node is None:
+            # Node is unpaired.
+            # If it hasn't pinged Arvados after boot_fail seconds, shut it down
+            return not timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after)
+        missing = arvados_node_missing(self.arvados_node, self.node_stale_after)
+        if missing and self._cloud.broken(self.cloud_node):
+            # Node is paired, but Arvados says it is missing and the cloud says the node
+            # is in an error state, so shut it down.
+            return True
+        if missing is None and self._cloud.broken(self.cloud_node):
+            self._logger.warning(
+                "cloud reports broken node, but paired node %s never pinged "
+                "(bug?) -- skipped check for node_stale_after",
+                self.arvados_node['uuid'])
+        return self.in_state('idle')
 
     def consider_shutdown(self):
         next_opening = self._shutdowns.next_opening()
index 71e73f17f64ffbfc16f5b6485622b23fd764cdbf..ec5014e9f9cf1e8848353cf3c755e22875227850 100644 (file)
@@ -10,11 +10,15 @@ from . import \
 from . import ComputeNodeShutdownActor as ShutdownActorBase
 
 class ComputeNodeShutdownActor(ShutdownActorBase):
-    SLURM_END_STATES = frozenset(['down\n', 'down*\n', 'drain\n', 'fail\n'])
+    SLURM_END_STATES = frozenset(['down\n', 'down*\n',
+                                  'drain\n', 'drain*\n',
+                                  'fail\n', 'fail*\n'])
+    SLURM_DRAIN_STATES = frozenset(['drain\n', 'drng\n'])
 
     def on_start(self):
         arv_node = self._arvados_node()
         if arv_node is None:
+            self._nodename = None
             return super(ComputeNodeShutdownActor, self).on_start()
         else:
             self._nodename = arv_node['hostname']
@@ -27,23 +31,40 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
         cmd.extend(args)
         subprocess.check_output(cmd)
 
-    @ShutdownActorBase._retry((subprocess.CalledProcessError,))
+    def _get_slurm_state(self):
+        return subprocess.check_output(['sinfo', '--noheader', '-o', '%t', '-n', self._nodename])
+
+    # The following methods retry on OSError.  This is intended to mitigate bug
+    # #6321 where fork() of node manager raises "OSError: [Errno 12] Cannot
+    # allocate memory" resulting in the untimely death of the shutdown actor
+    # and tends to result in node manager getting into a wedged state where it
+    # won't allocate new nodes or shut down gracefully.  The underlying causes
+    # of the excessive memory usage that result in the "Cannot allocate memory"
+    # error are still being investigated.
+
+    @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
     def cancel_shutdown(self):
-        self._set_node_state('RESUME')
+        if self._nodename:
+            if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
+                # Resume from "drng" or "drain"
+                self._set_node_state('RESUME')
+            else:
+                # Node is in a state such as 'idle' or 'alloc' so don't
+                # try to resume it because that will just raise an error.
+                pass
         return super(ComputeNodeShutdownActor, self).cancel_shutdown()
 
+    @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
     @ShutdownActorBase._stop_if_window_closed
-    @ShutdownActorBase._retry((subprocess.CalledProcessError,))
     def issue_slurm_drain(self):
         self._set_node_state('DRAIN', 'Reason=Node Manager shutdown')
         self._logger.info("Waiting for SLURM node %s to drain", self._nodename)
         self._later.await_slurm_drain()
 
+    @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
     @ShutdownActorBase._stop_if_window_closed
-    @ShutdownActorBase._retry((subprocess.CalledProcessError,))
     def await_slurm_drain(self):
-        output = subprocess.check_output(
-            ['sinfo', '--noheader', '-o', '%t', '-n', self._nodename])
+        output = self._get_slurm_state()
         if output in self.SLURM_END_STATES:
             self._later.shutdown_node()
         else:
index 724c772733ae0ed1479e7b31d24238a87b591b1f..14e804f385dc3e957fd707c34e7e4504ed1951e7 100644 (file)
@@ -103,6 +103,10 @@ class BaseComputeNodeDriver(object):
         """
         raise NotImplementedError("BaseComputeNodeDriver.arvados_create_kwargs")
 
+    def broken(self, cloud_node):
+        """Return true if libcloud has indicated the node is in a "broken" state."""
+        return False
+
     def _make_ping_url(self, arvados_node):
         return 'https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.format(
             self.ping_host, arvados_node['uuid'],
index b1494d02851f0f78b85ddc070a16211983e98b2f..b1ec5e6abc95e923e1b6583d3c33e49a3247368d 100644 (file)
@@ -16,6 +16,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
 
     DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
     SEARCH_CACHE = {}
+    CLOUD_ERRORS = BaseComputeNodeDriver.CLOUD_ERRORS + (BaseHTTPError,)
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class=DEFAULT_DRIVER):
@@ -81,6 +82,12 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
                 super(ComputeNodeDriver, self).list_nodes()
                 if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
 
+    def broken(self, cloud_node):
+        """Return true if libcloud has indicated the node is in a "broken" state."""
+        # UNKNOWN means the node state is unrecognized, which in practice means some combination
+        # of failure that the Azure libcloud driver doesn't know how to interpret.
+        return (cloud_node.state in (cloud_types.NodeState.ERROR, cloud_types.NodeState.UNKNOWN))
+
     @classmethod
     def node_fqdn(cls, node):
         return node.extra["tags"].get("hostname")
index 44f15132643f558e3ed3591935ac1f517de589d5..1d52073ce5ff7a362e0b8e9cc13b12c0f2d3b6a2 100644 (file)
@@ -159,7 +159,9 @@ class NodeManagerDaemonActor(actor_class):
             timer_actor=self._timer,
             arvados_node=None,
             poll_stale_after=self.poll_stale_after,
-            node_stale_after=self.node_stale_after).proxy()
+            node_stale_after=self.node_stale_after,
+            cloud_client=self._cloud_driver,
+            boot_fail_after=self.boot_fail_after).proxy()
         actor.subscribe(self._later.node_can_shutdown)
         self._cloud_nodes_actor.subscribe_to(cloud_node.id,
                                              actor.update_cloud_node)
@@ -180,9 +182,14 @@ class NodeManagerDaemonActor(actor_class):
                     self._pair_nodes(record, arv_rec.arvados_node)
                     break
         for key, record in self.cloud_nodes.orphans.iteritems():
+            if key in self.shutdowns:
+                try:
+                    self.shutdowns[key].stop().get()
+                except pykka.ActorDeadError:
+                    pass
+                del self.shutdowns[key]
             record.actor.stop()
             record.cloud_node = None
-            self.shutdowns.pop(key, None)
 
     def update_arvados_nodes(self, nodelist):
         self._update_poll_time('arvados_nodes')
@@ -207,6 +214,13 @@ class NodeManagerDaemonActor(actor_class):
                                  self.cloud_nodes.nodes.itervalues())
                    if busy)
 
+    def _nodes_missing(self):
+        return sum(1 for arv_node in
+                   pykka.get_all(rec.actor.arvados_node for rec in
+                                 self.cloud_nodes.nodes.itervalues()
+                                 if rec.actor.cloud_node.get().id not in self.shutdowns)
+                   if arv_node and cnode.arvados_node_missing(arv_node, self.node_stale_after))
+
     def _nodes_wanted(self):
         up_count = self._nodes_up()
         under_min = self.min_nodes - up_count
@@ -216,7 +230,7 @@ class NodeManagerDaemonActor(actor_class):
         elif under_min > 0:
             return under_min
         else:
-            up_count -= len(self.shutdowns) + self._nodes_busy()
+            up_count -= len(self.shutdowns) + self._nodes_busy() + self._nodes_missing()
             return len(self.last_wishlist) - up_count
 
     def _nodes_excess(self):
index 6d85e86d3274d4e6f6f7dc1fcb6935e5e5597f2b..e74fb8eedfd1f23b7e7d40185f6496d3d498e533 100644 (file)
@@ -31,11 +31,11 @@ setup(name='arvados-node-manager',
         'python-daemon',
         ],
       dependency_links = [
-          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev3.zip"
+          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev4.zip"
       ],
       scripts=['bin/arvados-node-manager'],
       test_suite='tests',
-      tests_require=['mock>=1.0', "apache-libcloud==0.18.1.dev3"],
+      tests_require=['pbr<1.7.0', 'mock>=1.0', "apache-libcloud==0.18.1.dev4"],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
       )
index c22e7a0e0b8d16a0d55b782df27e1c0bfe3ecacb..e718fc134b7a20723f7f33ef5aeefd5095763eb6 100644 (file)
@@ -128,12 +128,14 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         self.cloud_node = cloud_node
         self.arvados_node = arvados_node
 
-    def make_actor(self, cancellable=True):
+    def make_actor(self, cancellable=True, start_time=None):
         if not hasattr(self, 'timer'):
             self.make_mocks()
+        if start_time is None:
+            start_time = time.time()
         monitor_actor = dispatch.ComputeNodeMonitorActor.start(
-            self.cloud_node, time.time(), self.shutdowns,
-            testutil.cloud_node_fqdn, self.timer, self.updates,
+            self.cloud_node, start_time, self.shutdowns,
+            testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
             self.arvados_node)
         self.shutdown_actor = self.ACTOR_CLASS.start(
             self.timer, self.cloud_client, self.arvados_client, monitor_actor,
@@ -190,7 +192,7 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
     ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
 
     def test_easy_shutdown(self):
-        self.make_actor()
+        self.make_actor(start_time=0)
         self.check_success_flag(True)
         self.assertTrue(self.cloud_client.destroy_node.called)
 
@@ -203,7 +205,7 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
     def test_shutdown_retries_when_cloud_fails(self):
         self.make_mocks()
         self.cloud_client.destroy_node.return_value = False
-        self.make_actor()
+        self.make_actor(start_time=0)
         self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
         self.cloud_client.destroy_node.return_value = True
         self.check_success_flag(True)
@@ -241,6 +243,8 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.updates = mock.MagicMock(name='update_mock')
         self.cloud_mock = testutil.cloud_node_mock(node_num)
         self.subscriber = mock.Mock(name='subscriber_mock')
+        self.cloud_client = mock.MagicMock(name='cloud_client')
+        self.cloud_client.broken.return_value = False
 
     def make_actor(self, node_num=1, arv_node=None, start_time=None):
         if not hasattr(self, 'cloud_mock'):
@@ -249,8 +253,8 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
             start_time = time.time()
         self.node_actor = dispatch.ComputeNodeMonitorActor.start(
             self.cloud_mock, start_time, self.shutdowns,
-            testutil.cloud_node_fqdn, self.timer, self.updates,
-            arv_node).proxy()
+            testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
+            arv_node, boot_fail_after=300).proxy()
         self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
 
     def node_state(self, *states):
@@ -298,23 +302,48 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.assertFalse(self.subscriber.called)
 
     def test_shutdown_subscription(self):
-        self.make_actor()
+        self.make_actor(start_time=0)
         self.shutdowns._set_state(True, 600)
         self.node_actor.consider_shutdown().get(self.TIMEOUT)
         self.assertTrue(self.subscriber.called)
         self.assertEqual(self.node_actor.actor_ref.actor_urn,
                          self.subscriber.call_args[0][0].actor_ref.actor_urn)
 
-    def test_shutdown_without_arvados_node(self):
+    def test_no_shutdown_booting(self):
         self.make_actor()
         self.shutdowns._set_state(True, 600)
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
-    def test_no_shutdown_without_arvados_node_and_old_cloud_node(self):
+    def test_shutdown_without_arvados_node(self):
         self.make_actor(start_time=0)
         self.shutdowns._set_state(True, 600)
+        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+    def test_no_shutdown_missing(self):
+        arv_node = testutil.arvados_node_mock(10, job_uuid=None,
+                                              crunch_worker_state="down",
+                                              last_ping_at='1970-01-01T01:02:03.04050607Z')
+        self.make_actor(10, arv_node)
+        self.shutdowns._set_state(True, 600)
         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
+    def test_no_shutdown_running_broken(self):
+        arv_node = testutil.arvados_node_mock(12, job_uuid=None,
+                                              crunch_worker_state="down")
+        self.make_actor(12, arv_node)
+        self.shutdowns._set_state(True, 600)
+        self.cloud_client.broken.return_value = True
+        self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+    def test_shutdown_missing_broken(self):
+        arv_node = testutil.arvados_node_mock(11, job_uuid=None,
+                                              crunch_worker_state="down",
+                                              last_ping_at='1970-01-01T01:02:03.04050607Z')
+        self.make_actor(11, arv_node)
+        self.shutdowns._set_state(True, 600)
+        self.cloud_client.broken.return_value = True
+        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
     def test_no_shutdown_when_window_closed(self):
         self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
         self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
index ac3ebf0435ac4e8a780cf0399b5015d51001bbba..8648783bac5889f11a328af3b277bd1f21da5665 100644 (file)
@@ -55,17 +55,33 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
     def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
         # Test we correctly handle a node that failed to bootstrap.
         proc_mock.return_value = 'idle\n'
-        self.make_actor()
+        self.make_actor(start_time=0)
         self.check_success_flag(True)
         self.assertFalse(proc_mock.called)
 
     def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
-        proc_mock.return_value = 'alloc\n'
+        proc_mock.side_effect = iter(['drng\n', 'idle\n'])
+        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+        self.make_actor()
+        self.check_success_flag(False, 2)
+        self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
+
+    def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
+        proc_mock.side_effect = iter(['alloc\n'])
         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
         self.make_actor()
         self.check_success_flag(False, 2)
-        self.check_slurm_got_args(proc_mock, 'NodeName=compute99',
-                                  'State=RESUME')
+        self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
+
+    def test_cancel_shutdown_retry(self, proc_mock):
+        proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
+        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+        self.make_actor()
+        self.check_success_flag(False, 2)
+
+    def test_issue_slurm_drain_retry(self, proc_mock):
+        proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+        self.check_success_after_reset(proc_mock)
 
     def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
         proc_mock.return_value = 'drain\n'
index b406f1357671f0efe85813004ff2ddb0629584a1..16f560457765e3878e8e4c2a1bae6d46f8615a43 100644 (file)
@@ -123,7 +123,53 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.make_daemon([testutil.cloud_node_mock()],
                          want_sizes=[testutil.MockSize(1)])
         self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_setup.called)
+        self.assertFalse(self.node_setup.start.called)
+
+    def test_dont_count_missing_as_busy(self):
+        size = testutil.MockSize(1)
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1),
+                                      testutil.cloud_node_mock(2)],
+                         arvados_nodes=[testutil.arvados_node_mock(1),
+                                      testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+                         want_sizes=[size, size])
+        self.stop_proxy(self.daemon)
+        self.assertTrue(self.node_setup.start.called)
+
+    def test_missing_counts_towards_max(self):
+        size = testutil.MockSize(1)
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1),
+                                      testutil.cloud_node_mock(2)],
+                         arvados_nodes=[testutil.arvados_node_mock(1),
+                                        testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+                         want_sizes=[size, size],
+                         max_nodes=2)
+        self.stop_proxy(self.daemon)
+        self.assertFalse(self.node_setup.start.called)
+
+    def test_excess_counts_missing(self):
+        size = testutil.MockSize(1)
+        cloud_nodes = [testutil.cloud_node_mock(1), testutil.cloud_node_mock(2)]
+        self.make_daemon(cloud_nodes=cloud_nodes,
+                         arvados_nodes=[testutil.arvados_node_mock(1),
+                                        testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+                         want_sizes=[size])
+        self.assertEqual(2, self.alive_monitor_count())
+        for mon_ref in self.monitor_list():
+            self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
+        self.assertEqual(1, self.node_shutdown.start.call_count)
+
+    def test_missing_shutdown_not_excess(self):
+        size = testutil.MockSize(1)
+        cloud_nodes = [testutil.cloud_node_mock(1), testutil.cloud_node_mock(2)]
+        self.make_daemon(cloud_nodes=cloud_nodes,
+                         arvados_nodes=[testutil.arvados_node_mock(1),
+                                        testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+                         want_sizes=[size])
+        self.daemon.shutdowns.get()[cloud_nodes[1].id] = True
+        self.assertEqual(2, self.alive_monitor_count())
+        for mon_ref in self.monitor_list():
+            self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
+        self.assertEqual(0, self.node_shutdown.start.call_count)
 
     def test_booting_nodes_counted(self):
         cloud_node = testutil.cloud_node_mock(1)
@@ -459,3 +505,26 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.timer.deliver()
         self.stop_proxy(self.daemon)
         self.assertEqual(1, self.node_setup.start.call_count)
+
+    def test_shutdown_actor_stopped_when_cloud_node_delisted(self):
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(
+            1, self.node_shutdown.start().proxy().stop().get.call_count)
+
+    def test_shutdown_actor_cleanup_copes_with_dead_actors(self):
+        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        # We're mainly testing that update_cloud_nodes catches and handles
+        # the ActorDeadError.
+        stop_method = self.node_shutdown.start().proxy().stop().get
+        stop_method.side_effect = pykka.ActorDeadError
+        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, stop_method.call_count)
diff --git a/tools/keep-exercise/.gitignore b/tools/keep-exercise/.gitignore
new file mode 100644 (file)
index 0000000..6a1d10c
--- /dev/null
@@ -0,0 +1 @@
+keep-exercise
diff --git a/tools/keep-exercise/keep-exercise.go b/tools/keep-exercise/keep-exercise.go
new file mode 100644 (file)
index 0000000..a94c01e
--- /dev/null
@@ -0,0 +1,157 @@
+// Testing tool for Keep services.
+//
+// keepexercise helps measure throughput and test reliability under
+// various usage patterns.
+//
+// By default, it reads and writes blocks containing 2^26 NUL
+// bytes. This generates network traffic without consuming much disk
+// space.
+//
+// For a more realistic test, enable -vary-request. Warning: this will
+// fill your storage volumes with random data if you leave it running,
+// which can cost you money or leave you with too little room for
+// useful data.
+//
+package main
+
+import (
+       "crypto/rand"
+       "encoding/binary"
+       "flag"
+       "io"
+       "io/ioutil"
+       "log"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+// Command line config knobs
+var (
+       BlockSize     = flag.Int("block-size", keepclient.BLOCKSIZE, "bytes per read/write op")
+       ReadThreads   = flag.Int("rthreads", 1, "number of concurrent readers")
+       WriteThreads  = flag.Int("wthreads", 1, "number of concurrent writers")
+       VaryRequest   = flag.Bool("vary-request", false, "vary the data for each request: consumes disk space, exercises write behavior")
+       VaryThread    = flag.Bool("vary-thread", false, "use -wthreads different data blocks")
+       Replicas      = flag.Int("replicas", 1, "replication level for writing")
+       StatsInterval = flag.Duration("stats-interval", time.Second, "time interval between IO stats reports, or 0 to disable")
+)
+
+func main() {
+       flag.Parse()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               log.Fatal(err)
+       }
+       kc, err := keepclient.MakeKeepClient(&arv)
+       if err != nil {
+               log.Fatal(err)
+       }
+       kc.Want_replicas = *Replicas
+       kc.Client.Timeout = 10 * time.Minute
+
+       nextBuf := make(chan []byte, *WriteThreads)
+       nextLocator := make(chan string, *ReadThreads+*WriteThreads)
+
+       go countBeans(nextLocator)
+       for i := 0; i < *WriteThreads; i++ {
+               go makeBufs(nextBuf, i)
+               go doWrites(kc, nextBuf, nextLocator)
+       }
+       for i := 0; i < *ReadThreads; i++ {
+               go doReads(kc, nextLocator)
+       }
+       <-make(chan struct{})
+}
+
+// Send 1234 to bytesInChan when we receive 1234 bytes from keepstore.
+var bytesInChan = make(chan uint64)
+var bytesOutChan = make(chan uint64)
+
+// Send struct{}{} to errorsChan when an error happens.
+var errorsChan = make(chan struct{})
+
+func countBeans(nextLocator chan string) {
+       t0 := time.Now()
+       var tickChan <-chan time.Time
+       if *StatsInterval > 0 {
+               tickChan = time.NewTicker(*StatsInterval).C
+       }
+       var bytesIn uint64
+       var bytesOut uint64
+       var errors uint64
+       for {
+               select {
+               case <-tickChan:
+                       elapsed := time.Since(t0)
+                       log.Printf("%v elapsed: read %v bytes (%.1f MiB/s), wrote %v bytes (%.1f MiB/s), errors %d",
+                               elapsed,
+                               bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576),
+                               bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576),
+                               errors,
+                       )
+               case i := <-bytesInChan:
+                       bytesIn += i
+               case o := <-bytesOutChan:
+                       bytesOut += o
+               case <-errorsChan:
+                       errors++
+               }
+       }
+}
+
+func makeBufs(nextBuf chan []byte, threadID int) {
+       buf := make([]byte, *BlockSize)
+       if *VaryThread {
+               binary.PutVarint(buf, int64(threadID))
+       }
+       for {
+               if *VaryRequest {
+                       if _, err := io.ReadFull(rand.Reader, buf); err != nil {
+                               log.Fatal(err)
+                       }
+               }
+               nextBuf <- buf
+       }
+}
+
+func doWrites(kc *keepclient.KeepClient, nextBuf chan []byte, nextLocator chan string) {
+       for buf := range nextBuf {
+               locator, _, err := kc.PutB(buf)
+               if err != nil {
+                       log.Print(err)
+                       errorsChan <- struct{}{}
+                       continue
+               }
+               bytesOutChan <- uint64(len(buf))
+               for cap(nextLocator) > len(nextLocator)+*WriteThreads {
+                       // Give the readers something to do, unless
+                       // they have lots queued up already.
+                       nextLocator <- locator
+               }
+       }
+}
+
+func doReads(kc *keepclient.KeepClient, nextLocator chan string) {
+       for locator := range nextLocator {
+               rdr, size, url, err := kc.Get(locator)
+               if err != nil {
+                       log.Print(err)
+                       errorsChan <- struct{}{}
+                       continue
+               }
+               n, err := io.Copy(ioutil.Discard, rdr)
+               rdr.Close()
+               if n != size || err != nil {
+                       log.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
+                       errorsChan <- struct{}{}
+                       continue
+                       // Note we don't count the bytes received in
+                       // partial/corrupt responses: we are measuring
+                       // throughput, not resource consumption.
+               }
+               bytesInChan <- uint64(n)
+       }
+}
diff --git a/tools/keep-rsync/.gitignore b/tools/keep-rsync/.gitignore
new file mode 100644 (file)
index 0000000..5ee7f3b
--- /dev/null
@@ -0,0 +1 @@
+keep-rsync
diff --git a/tools/keep-rsync/keep-rsync.go b/tools/keep-rsync/keep-rsync.go
new file mode 100644 (file)
index 0000000..7cd795e
--- /dev/null
@@ -0,0 +1,290 @@
+package main
+
+import (
+       "bufio"
+       "crypto/tls"
+       "errors"
+       "flag"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io/ioutil"
+       "log"
+       "net/http"
+       "os"
+       "regexp"
+       "strings"
+       "time"
+)
+
+func main() {
+       err := doMain()
+       if err != nil {
+               log.Fatalf("%v", err)
+       }
+}
+
+func doMain() error {
+       flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError)
+
+       srcConfigFile := flags.String(
+               "src",
+               "",
+               "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
+
+       dstConfigFile := flags.String(
+               "dst",
+               "",
+               "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
+
+       srcKeepServicesJSON := flags.String(
+               "src-keep-services-json",
+               "",
+               "An optional list of available source keepservices. "+
+                       "If not provided, this list is obtained from api server configured in src-config-file.")
+
+       dstKeepServicesJSON := flags.String(
+               "dst-keep-services-json",
+               "",
+               "An optional list of available destination keepservices. "+
+                       "If not provided, this list is obtained from api server configured in dst-config-file.")
+
+       replications := flags.Int(
+               "replications",
+               0,
+               "Number of replications to write to the destination. If replications not specified, "+
+                       "default replication level configured on destination server will be used.")
+
+       prefix := flags.String(
+               "prefix",
+               "",
+               "Index prefix")
+
+       // Parse args; omit the first arg which is the command name
+       flags.Parse(os.Args[1:])
+
+       srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile)
+       if err != nil {
+               return fmt.Errorf("Error loading src configuration from file: %s", err.Error())
+       }
+
+       dstConfig, _, err := loadConfig(*dstConfigFile)
+       if err != nil {
+               return fmt.Errorf("Error loading dst configuration from file: %s", err.Error())
+       }
+
+       // setup src and dst keepclients
+       kcSrc, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0)
+       if err != nil {
+               return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
+       }
+
+       kcDst, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications)
+       if err != nil {
+               return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
+       }
+
+       // Copy blocks not found in dst from src
+       err = performKeepRsync(kcSrc, kcDst, srcBlobSigningKey, *prefix)
+       if err != nil {
+               return fmt.Errorf("Error while syncing data: %s", err.Error())
+       }
+
+       return nil
+}
+
+type apiConfig struct {
+       APIToken        string
+       APIHost         string
+       APIHostInsecure bool
+       ExternalClient  bool
+}
+
+// Load src and dst config from given files
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+       if configFile == "" {
+               return config, blobSigningKey, errors.New("config file not specified")
+       }
+
+       config, blobSigningKey, err = readConfigFromFile(configFile)
+       if err != nil {
+               return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
+       }
+
+       return
+}
+
+var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
+
+// Read config from file
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
+       if !strings.Contains(filename, "/") {
+               filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
+       }
+
+       content, err := ioutil.ReadFile(filename)
+
+       if err != nil {
+               return config, "", err
+       }
+
+       lines := strings.Split(string(content), "\n")
+       for _, line := range lines {
+               if line == "" {
+                       continue
+               }
+
+               kv := strings.SplitN(line, "=", 2)
+               key := strings.TrimSpace(kv[0])
+               value := strings.TrimSpace(kv[1])
+
+               switch key {
+               case "ARVADOS_API_TOKEN":
+                       config.APIToken = value
+               case "ARVADOS_API_HOST":
+                       config.APIHost = value
+               case "ARVADOS_API_HOST_INSECURE":
+                       config.APIHostInsecure = matchTrue.MatchString(value)
+               case "ARVADOS_EXTERNAL_CLIENT":
+                       config.ExternalClient = matchTrue.MatchString(value)
+               case "ARVADOS_BLOB_SIGNING_KEY":
+                       blobSigningKey = value
+               }
+       }
+       return
+}
+
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
+       arv := arvadosclient.ArvadosClient{
+               ApiToken:    config.APIToken,
+               ApiServer:   config.APIHost,
+               ApiInsecure: config.APIHostInsecure,
+               Client: &http.Client{Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+               External: config.ExternalClient,
+       }
+
+       // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
+       if keepServicesJSON == "" {
+               kc, err = keepclient.MakeKeepClient(&arv)
+               if err != nil {
+                       return nil, err
+               }
+       } else {
+               kc = keepclient.New(&arv)
+               err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
+               if err != nil {
+                       return kc, err
+               }
+       }
+
+       if isDst {
+               // Get default replications value from destination, if it is not already provided
+               if replications == 0 {
+                       value, err := arv.Discovery("defaultCollectionReplication")
+                       if err == nil {
+                               replications = int(value.(float64))
+                       } else {
+                               return nil, err
+                       }
+               }
+
+               kc.Want_replicas = replications
+       }
+
+       return kc, nil
+}
+
+// Get unique block locators from src and dst
+// Copy any blocks missing in dst
+func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefix string) error {
+       // Get unique locators from src
+       srcIndex, err := getUniqueLocators(kcSrc, prefix)
+       if err != nil {
+               return err
+       }
+
+       // Get unique locators from dst
+       dstIndex, err := getUniqueLocators(kcDst, prefix)
+       if err != nil {
+               return err
+       }
+
+       // Get list of locators found in src, but missing in dst
+       toBeCopied := getMissingLocators(srcIndex, dstIndex)
+
+       // Copy each missing block to dst
+       log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
+               len(srcIndex), len(dstIndex), len(toBeCopied))
+
+       err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
+
+       return err
+}
+
+// Get list of unique locators from the specified cluster
+func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
+       uniqueLocators := map[string]bool{}
+
+       // Get index and dedup
+       for uuid := range kc.LocalRoots() {
+               reader, err := kc.GetIndex(uuid, prefix)
+               if err != nil {
+                       return uniqueLocators, err
+               }
+               scanner := bufio.NewScanner(reader)
+               for scanner.Scan() {
+                       uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
+               }
+       }
+
+       return uniqueLocators, nil
+}
+
+// Get list of locators that are in src but not in dst
+func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
+       var missingLocators []string
+       for locator := range srcLocators {
+               if _, ok := dstLocators[locator]; !ok {
+                       missingLocators = append(missingLocators, locator)
+               }
+       }
+       return missingLocators
+}
+
+// Copy blocks from src to dst; only those that are missing in dst are copied
+func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
+       total := len(toBeCopied)
+
+       startedAt := time.Now()
+       for done, locator := range toBeCopied {
+               if done == 0 {
+                       log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
+                               float64(done)/float64(total)*100, locator)
+               } else {
+                       timePerBlock := time.Since(startedAt) / time.Duration(done)
+                       log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total,
+                               float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
+               }
+
+               getLocator := locator
+               expiresAt := time.Now().AddDate(0, 0, 1)
+               if blobSigningKey != "" {
+                       getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
+               }
+
+               reader, len, _, err := kcSrc.Get(getLocator)
+               if err != nil {
+                       return fmt.Errorf("Error getting block: %v %v", locator, err)
+               }
+
+               _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
+               if err != nil {
+                       return fmt.Errorf("Error copying data block: %v %v", locator, err)
+               }
+       }
+
+       log.Printf("Successfully copied to destination %d blocks.", total)
+       return nil
+}
diff --git a/tools/keep-rsync/keep-rsync_test.go b/tools/keep-rsync/keep-rsync_test.go
new file mode 100644 (file)
index 0000000..6fbb535
--- /dev/null
@@ -0,0 +1,476 @@
+package main
+
+import (
+       "crypto/md5"
+       "fmt"
+       "io/ioutil"
+       "os"
+       "strings"
+       "testing"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+
+       . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+// Gocheck boilerplate
+var _ = Suite(&ServerRequiredSuite{})
+var _ = Suite(&ServerNotRequiredSuite{})
+var _ = Suite(&DoMainTestSuite{})
+
+// Tests that require the Keep server running
+type ServerRequiredSuite struct{}
+type ServerNotRequiredSuite struct{}
+type DoMainTestSuite struct{}
+
+func (s *ServerRequiredSuite) SetUpSuite(c *C) {
+       // Start API server
+       arvadostest.StartAPI()
+}
+
+func (s *ServerRequiredSuite) TearDownSuite(c *C) {
+       arvadostest.StopAPI()
+       arvadostest.ResetEnv()
+}
+
+var initialArgs []string
+
+func (s *DoMainTestSuite) SetUpSuite(c *C) {
+       initialArgs = os.Args
+}
+
+var kcSrc, kcDst *keepclient.KeepClient
+var srcKeepServicesJSON, dstKeepServicesJSON, blobSigningKey string
+
+func (s *ServerRequiredSuite) SetUpTest(c *C) {
+       // reset all variables between tests
+       blobSigningKey = ""
+       srcKeepServicesJSON = ""
+       dstKeepServicesJSON = ""
+       kcSrc = &keepclient.KeepClient{}
+       kcDst = &keepclient.KeepClient{}
+}
+
+func (s *ServerRequiredSuite) TearDownTest(c *C) {
+       arvadostest.StopKeep(3)
+}
+
+func (s *DoMainTestSuite) SetUpTest(c *C) {
+       args := []string{"keep-rsync"}
+       os.Args = args
+}
+
+func (s *DoMainTestSuite) TearDownTest(c *C) {
+       os.Args = initialArgs
+}
+
+var testKeepServicesJSON = "{ \"kind\":\"arvados#keepServiceList\", \"etag\":\"\", \"self_link\":\"\", \"offset\":null, \"limit\":null, \"items\":[ { \"href\":\"/keep_services/zzzzz-bi6l4-123456789012340\", \"kind\":\"arvados#keepService\", \"etag\":\"641234567890enhj7hzx432e5\", \"uuid\":\"zzzzz-bi6l4-123456789012340\", \"owner_uuid\":\"zzzzz-tpzed-123456789012345\", \"service_host\":\"keep0.zzzzz.arvadosapi.com\", \"service_port\":25107, \"service_ssl_flag\":false, \"service_type\":\"disk\", \"read_only\":false }, { \"href\":\"/keep_services/zzzzz-bi6l4-123456789012341\", \"kind\":\"arvados#keepService\", \"etag\":\"641234567890enhj7hzx432e5\", \"uuid\":\"zzzzz-bi6l4-123456789012341\", \"owner_uuid\":\"zzzzz-tpzed-123456789012345\", \"service_host\":\"keep0.zzzzz.arvadosapi.com\", \"service_port\":25108, \"service_ssl_flag\":false, \"service_type\":\"disk\", \"read_only\":false } ], \"items_available\":2 }"
+
+// Testing keep-rsync needs two sets of keep services: src and dst.
+// The test setup hence creates 3 servers instead of the default 2,
+// and uses the first 2 as src and the 3rd as dst keep servers.
+func setupRsync(c *C, enforcePermissions bool, replications int) {
+       // srcConfig
+       var srcConfig apiConfig
+       srcConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+       srcConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+       srcConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+       // dstConfig
+       var dstConfig apiConfig
+       dstConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+       dstConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+       dstConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+       if enforcePermissions {
+               blobSigningKey = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
+       }
+
+       // Start Keep servers
+       arvadostest.StartKeep(3, enforcePermissions)
+
+       // setup keepclients
+       var err error
+       kcSrc, err = setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
+       c.Check(err, IsNil)
+
+       kcDst, err = setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
+       c.Check(err, IsNil)
+
+       for uuid := range kcSrc.LocalRoots() {
+               if strings.HasSuffix(uuid, "02") {
+                       delete(kcSrc.LocalRoots(), uuid)
+               }
+       }
+       for uuid := range kcSrc.GatewayRoots() {
+               if strings.HasSuffix(uuid, "02") {
+                       delete(kcSrc.GatewayRoots(), uuid)
+               }
+       }
+       for uuid := range kcSrc.WritableLocalRoots() {
+               if strings.HasSuffix(uuid, "02") {
+                       delete(kcSrc.WritableLocalRoots(), uuid)
+               }
+       }
+
+       for uuid := range kcDst.LocalRoots() {
+               if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+                       delete(kcDst.LocalRoots(), uuid)
+               }
+       }
+       for uuid := range kcDst.GatewayRoots() {
+               if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+                       delete(kcDst.GatewayRoots(), uuid)
+               }
+       }
+       for uuid := range kcDst.WritableLocalRoots() {
+               if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+                       delete(kcDst.WritableLocalRoots(), uuid)
+               }
+       }
+
+       if replications == 0 {
+               // Must have got default replications value of 2 from dst discovery document
+               c.Assert(kcDst.Want_replicas, Equals, 2)
+       } else {
+               // Since replications value is provided, it is used
+               c.Assert(kcDst.Want_replicas, Equals, replications)
+       }
+}
+
+func (s *ServerRequiredSuite) TestRsyncPutInOne_GetFromOtherShouldFail(c *C) {
+       setupRsync(c, false, 1)
+
+       // Put a block in src and verify that it is not found in dst
+       testNoCrosstalk(c, "test-data-1", kcSrc, kcDst)
+
+       // Put a block in dst and verify that it is not found in src
+       testNoCrosstalk(c, "test-data-2", kcDst, kcSrc)
+}
+
+func (s *ServerRequiredSuite) TestRsyncWithBlobSigning_PutInOne_GetFromOtherShouldFail(c *C) {
+       setupRsync(c, true, 1)
+
+       // Put a block in src and verify that it is not found in dst
+       testNoCrosstalk(c, "test-data-1", kcSrc, kcDst)
+
+       // Put a block in dst and verify that it is not found in src
+       testNoCrosstalk(c, "test-data-2", kcDst, kcSrc)
+}
+
+// Do a Put in the first and Get from the second,
+// which should raise block not found error.
+func testNoCrosstalk(c *C, testData string, kc1, kc2 *keepclient.KeepClient) {
+       // Put a block using kc1
+       locator, _, err := kc1.PutB([]byte(testData))
+       c.Assert(err, Equals, nil)
+
+       locator = strings.Split(locator, "+")[0]
+       _, _, _, err = kc2.Get(keepclient.SignLocator(locator, kc2.Arvados.ApiToken, time.Now().AddDate(0, 0, 1), []byte(blobSigningKey)))
+       c.Assert(err, NotNil)
+       c.Check(err.Error(), Equals, "Block not found")
+}
+
+// Test keep-rsync initialization, with srcKeepServicesJSON
+func (s *ServerRequiredSuite) TestRsyncInitializeWithKeepServicesJSON(c *C) {
+       srcKeepServicesJSON = testKeepServicesJSON
+
+       setupRsync(c, false, 1)
+
+       localRoots := kcSrc.LocalRoots()
+       c.Check(localRoots, NotNil)
+
+       foundIt := false
+       for k := range localRoots {
+               if k == "zzzzz-bi6l4-123456789012340" {
+                       foundIt = true
+               }
+       }
+       c.Check(foundIt, Equals, true)
+
+       foundIt = false
+       for k := range localRoots {
+               if k == "zzzzz-bi6l4-123456789012341" {
+                       foundIt = true
+               }
+       }
+       c.Check(foundIt, Equals, true)
+}
+
+// Test keep-rsync initialization with default replications count
+func (s *ServerRequiredSuite) TestInitializeRsyncDefaultReplicationsCount(c *C) {
+       setupRsync(c, false, 0)
+}
+
+// Test keep-rsync initialization with replications count argument
+func (s *ServerRequiredSuite) TestInitializeRsyncReplicationsCount(c *C) {
+       setupRsync(c, false, 3)
+}
+
+// Put some blocks in Src and some more in Dst
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync(c *C) {
+       testKeepRsync(c, false, "")
+}
+
+// Put some blocks in Src and some more in Dst with blob signing enabled.
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithBlobSigning(c *C) {
+       testKeepRsync(c, true, "")
+}
+
+// Put some blocks in Src and some more in Dst
+// Use prefix while doing rsync
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithPrefix(c *C) {
+       data := []byte("test-data-4")
+       hash := fmt.Sprintf("%x", md5.Sum(data))
+
+       testKeepRsync(c, false, hash[0:3])
+       c.Check(len(dstIndex) > len(dstLocators), Equals, true)
+}
+
+// Put some blocks in Src and some more in Dst
+// Use prefix not in src while doing rsync
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithNoSuchPrefixInSrc(c *C) {
+       testKeepRsync(c, false, "999")
+       c.Check(len(dstIndex), Equals, len(dstLocators))
+}
+
+// Put 5 blocks in src. Put 2 of those blocks in dst
+// Hence there are 3 additional blocks in src
+// Also, put 2 extra blocks in dst; they are hence only in dst
+// Run rsync and verify that those 7 blocks are now available in dst
+func testKeepRsync(c *C, enforcePermissions bool, prefix string) {
+       setupRsync(c, enforcePermissions, 1)
+
+       // setupTestData
+       setupTestData(c, prefix)
+
+       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, prefix)
+       c.Check(err, IsNil)
+
+       // Now GetIndex from dst and verify that all 5 from src and the 2 extra blocks are found
+       dstIndex, err = getUniqueLocators(kcDst, "")
+       c.Check(err, IsNil)
+
+       for _, locator := range srcLocatorsMatchingPrefix {
+               _, ok := dstIndex[locator]
+               c.Assert(ok, Equals, true)
+       }
+
+       for _, locator := range extraDstLocators {
+               _, ok := dstIndex[locator]
+               c.Assert(ok, Equals, true)
+       }
+
+       if prefix == "" {
+               // all blocks from src and the two extra blocks
+               c.Assert(len(dstIndex), Equals, len(srcLocators)+len(extraDstLocators))
+       } else {
+               // 1 matching prefix and copied over, 2 that were initially copied into dst along with src, and the 2 extra blocks
+               c.Assert(len(dstIndex), Equals, len(srcLocatorsMatchingPrefix)+len(extraDstLocators)+2)
+       }
+}
+
+// Setup test data in src and dst.
+var srcLocators, srcLocatorsMatchingPrefix, dstLocators, extraDstLocators []string
+var dstIndex map[string]bool
+
+func setupTestData(c *C, indexPrefix string) {
+       srcLocators = []string{}
+       srcLocatorsMatchingPrefix = []string{}
+       dstLocators = []string{}
+       extraDstLocators = []string{}
+       dstIndex = make(map[string]bool)
+
+       // Put a few blocks in src using kcSrc
+       for i := 0; i < 5; i++ {
+               hash, _, err := kcSrc.PutB([]byte(fmt.Sprintf("test-data-%d", i)))
+               c.Check(err, IsNil)
+
+               srcLocators = append(srcLocators, strings.Split(hash, "+A")[0])
+               if strings.HasPrefix(hash, indexPrefix) {
+                       srcLocatorsMatchingPrefix = append(srcLocatorsMatchingPrefix, strings.Split(hash, "+A")[0])
+               }
+       }
+
+       // Put first two of those src blocks in dst using kcDst
+       for i := 0; i < 2; i++ {
+               hash, _, err := kcDst.PutB([]byte(fmt.Sprintf("test-data-%d", i)))
+               c.Check(err, IsNil)
+               dstLocators = append(dstLocators, strings.Split(hash, "+A")[0])
+       }
+
+       // Put two more blocks in dst; they are not in src at all
+       for i := 0; i < 2; i++ {
+               hash, _, err := kcDst.PutB([]byte(fmt.Sprintf("other-data-%d", i)))
+               c.Check(err, IsNil)
+               dstLocators = append(dstLocators, strings.Split(hash, "+A")[0])
+               extraDstLocators = append(extraDstLocators, strings.Split(hash, "+A")[0])
+       }
+}
+
+// Setup rsync using srcKeepServicesJSON with fake keepservers.
+// Expect error during performKeepRsync due to unreachable src keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeSrcKeepservers(c *C) {
+       srcKeepServicesJSON = testKeepServicesJSON
+
+       setupRsync(c, false, 1)
+
+       err := performKeepRsync(kcSrc, kcDst, "", "")
+       c.Check(strings.HasSuffix(err.Error(), "no such host"), Equals, true)
+}
+
+// Setup rsync using dstKeepServicesJSON with fake keepservers.
+// Expect error during performKeepRsync due to unreachable dst keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeDstKeepservers(c *C) {
+       dstKeepServicesJSON = testKeepServicesJSON
+
+       setupRsync(c, false, 1)
+
+       err := performKeepRsync(kcSrc, kcDst, "", "")
+       c.Check(strings.HasSuffix(err.Error(), "no such host"), Equals, true)
+}
+
+// Test rsync with signature error during Get from src.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorGettingBlockFromSrc(c *C) {
+       setupRsync(c, true, 1)
+
+       // put some blocks in src and dst
+       setupTestData(c, "")
+
+       // Change blob signing key to a fake key, so that Get from src fails
+       blobSigningKey = "thisisfakeblobsigningkey"
+
+       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+       c.Check(strings.HasSuffix(err.Error(), "Block not found"), Equals, true)
+}
+
+// Test rsync with error during Put to src.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorPuttingBlockInDst(c *C) {
+       setupRsync(c, false, 1)
+
+       // put some blocks in src and dst
+       setupTestData(c, "")
+
+       // Increase Want_replicas on dst to result in insufficient replicas error during Put
+       kcDst.Want_replicas = 2
+
+       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+       c.Check(strings.HasSuffix(err.Error(), "Could not write sufficient replicas"), Equals, true)
+}
+
+// Test loadConfig func
+func (s *ServerNotRequiredSuite) TestLoadConfig(c *C) {
+       // Setup a src config file
+       srcFile := setupConfigFile(c, "src-config")
+       defer os.Remove(srcFile.Name())
+       srcConfigFile := srcFile.Name()
+
+       // Setup a dst config file
+       dstFile := setupConfigFile(c, "dst-config")
+       defer os.Remove(dstFile.Name())
+       dstConfigFile := dstFile.Name()
+
+       // load configuration from those files
+       srcConfig, srcBlobSigningKey, err := loadConfig(srcConfigFile)
+       c.Check(err, IsNil)
+
+       c.Assert(srcConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+       c.Assert(srcConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+       c.Assert(srcConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+       c.Assert(srcConfig.ExternalClient, Equals, false)
+
+       dstConfig, _, err := loadConfig(dstConfigFile)
+       c.Check(err, IsNil)
+
+       c.Assert(dstConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+       c.Assert(dstConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+       c.Assert(dstConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+       c.Assert(dstConfig.ExternalClient, Equals, false)
+
+       c.Assert(srcBlobSigningKey, Equals, "abcdefg")
+}
+
+// Test loadConfig func without setting up the config files
+func (s *ServerNotRequiredSuite) TestLoadConfig_MissingSrcConfig(c *C) {
+       _, _, err := loadConfig("")
+       c.Assert(err.Error(), Equals, "config file not specified")
+}
+
+// Test loadConfig func - error reading config
+func (s *ServerNotRequiredSuite) TestLoadConfig_ErrorLoadingSrcConfig(c *C) {
+       _, _, err := loadConfig("no-such-config-file")
+       c.Assert(strings.HasSuffix(err.Error(), "no such file or directory"), Equals, true)
+}
+
+func setupConfigFile(c *C, name string) *os.File {
+       // Setup a config file
+       file, err := ioutil.TempFile(os.TempDir(), name)
+       c.Check(err, IsNil)
+
+       fileContent := "ARVADOS_API_HOST=" + os.Getenv("ARVADOS_API_HOST") + "\n"
+       fileContent += "ARVADOS_API_TOKEN=" + os.Getenv("ARVADOS_API_TOKEN") + "\n"
+       fileContent += "ARVADOS_API_HOST_INSECURE=" + os.Getenv("ARVADOS_API_HOST_INSECURE") + "\n"
+       fileContent += "ARVADOS_EXTERNAL_CLIENT=false\n"
+       fileContent += "ARVADOS_BLOB_SIGNING_KEY=abcdefg"
+
+       _, err = file.Write([]byte(fileContent))
+       c.Check(err, IsNil)
+
+       return file
+}
+
+func (s *DoMainTestSuite) Test_doMain_NoSrcConfig(c *C) {
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Error loading src configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMain_SrcButNoDstConfig(c *C) {
+       srcConfig := setupConfigFile(c, "src")
+       args := []string{"-replications", "3", "-src", srcConfig.Name()}
+       os.Args = append(os.Args, args...)
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Error loading dst configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMain_BadSrcConfig(c *C) {
+       args := []string{"-src", "abcd"}
+       os.Args = append(os.Args, args...)
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(strings.HasPrefix(err.Error(), "Error loading src configuration from file: Error reading config file"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithReplicationsButNoSrcConfig(c *C) {
+       args := []string{"-replications", "3"}
+       os.Args = append(os.Args, args...)
+       err := doMain()
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Error loading src configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMainWithSrcAndDstConfig(c *C) {
+       srcConfig := setupConfigFile(c, "src")
+       dstConfig := setupConfigFile(c, "dst")
+       args := []string{"-src", srcConfig.Name(), "-dst", dstConfig.Name()}
+       os.Args = append(os.Args, args...)
+
+       // Start keepservers. Since we are not doing any tweaking as in setupRsync func,
+       // kcSrc and kcDst will be the same and no actual copying to dst will happen, but that's ok.
+       arvadostest.StartKeep(2, false)
+
+       err := doMain()
+       c.Check(err, IsNil)
+}