Closes #7324.
rack (>= 1.0.0)
rack-test (>= 0.5.4)
xpath (~> 2.0)
- childprocess (0.5.5)
+ childprocess (0.5.6)
ffi (~> 1.0, >= 1.0.11)
cliver (0.3.2)
coffee-rails (4.1.0)
fast_stack (0.1.0)
rake
rake-compiler
- ffi (1.9.6)
+ ffi (1.9.10)
flamegraph (0.1.0)
fast_stack
google-api-client (0.6.4)
metaclass (~> 0.0.1)
morrisjs-rails (0.5.1)
railties (> 3.1, < 5)
- multi_json (1.11.1)
+ multi_json (1.11.2)
multipart-post (1.2.0)
net-scp (1.2.1)
net-ssh (>= 2.6.5)
ref (1.0.5)
ruby-debug-passenger (0.2.0)
ruby-prof (0.15.2)
- rubyzip (1.1.6)
+ rubyzip (1.1.7)
rvm-capistrano (1.5.5)
capistrano (~> 2.15.4)
sass (3.4.9)
sprockets (>= 2.8, < 4.0)
sprockets-rails (>= 2.0, < 4.0)
tilt (~> 1.1)
- selenium-webdriver (2.44.0)
+ selenium-webdriver (2.48.1)
childprocess (~> 0.5)
multi_json (~> 1.0)
rubyzip (~> 1.0)
execjs (>= 0.3.0)
json (>= 1.8.0)
uuidtools (2.1.5)
- websocket (1.2.1)
+ websocket (1.2.2)
websocket-driver (0.5.1)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.1)
therubyracer
uglifier (>= 1.0.3)
wiselinks
+
+BUNDLED WITH
+ 1.10.6
module VersionHelper
- # api_version returns the git commit hash for the API server's
- # current version. It is extracted from api_version_text, which
- # returns the source_version provided by the discovery document and
- # may have the word "-modified" appended to it (if the API server is
- # running from a locally modified repository).
-
- def api_version
- api_version_text.sub(/[^[:xdigit:]].*/, '')
- end
-
- def api_version_text
+ # Get the source_version given in the API server's discovery
+ # document.
+ def api_source_version
arvados_api_client.discovery[:source_version]
end
- # wb_version and wb_version_text provide the same strings for the
- # code version that this Workbench is currently running.
-
- def wb_version
- Rails.configuration.source_version
- end
-
- def wb_version_text
- wb_version + (Rails.configuration.local_modified or '')
- end
-
+ # URL for browsing source code for the given version.
def version_link_target version
- "https://arvados.org/projects/arvados/repository/changes?rev=#{version}"
+ "https://arvados.org/projects/arvados/repository/changes?rev=#{version.sub(/-.*/, "")}"
end
end
arvados_base = Rails.configuration.arvados_v1_base
support_email = Rails.configuration.support_email_address
- api_version_link = link_to api_version_text, version_link_target(api_version)
- wb_version_link = link_to wb_version_text, version_link_target(wb_version)
-
additional_info = {}
additional_info['Current location'] = params[:current_location]
additional_info['User UUID'] = current_user.uuid if current_user
additional_info_str = additional_info.map {|k,v| "#{k}=#{v}"}.join("\n")
- additional_info['api_version'] = api_version_text
+ additional_info['api_source_version'] = api_source_version
additional_info['generated_at'] = generated_at
- additional_info['workbench_version'] = wb_version_text
+ additional_info['workbench_version'] = AppVersion.hash
additional_info['arvados_base'] = arvados_base
additional_info['support_email'] = support_email
additional_info['error_message'] = params[:error_message] if params[:error_message]
<div class="form-group">
<label for="wb_version" class="col-sm-4 control-label"> Workbench version </label>
<div class="col-sm-8">
- <p class="form-control-static" name="wb_version"><%= wb_version_link %></p>
+ <p class="form-control-static" name="wb_version">
+ <%= link_to AppVersion.hash, version_link_target(AppVersion.hash) %>
+ </p>
</div>
</div>
<div class="form-group">
<label for="server_version" class="col-sm-4 control-label"> API version </label>
<div class="col-sm-8">
- <p class="form-control-static" name="server_version"><%= api_version_link %></p>
+ <p class="form-control-static" name="server_version">
+ <%= link_to api_source_version, version_link_target(api_source_version) %>
+ </p>
</div>
</div>
</li><li>
<strong>Use existing pipelines</strong>: Use best-practices pipelines on your own data with the click of a button.
</li><li>
- <strong>Open-source</strong>: Arvados is completely open-source. Check out our <a href="http://arvados.org">developer site</a>.
+ <strong>Open source</strong>: Arvados is completely open source. Check out our <a href="http://dev.arvados.org">developer site</a>.
</li>
</ol>
<p style="margin-top: 1em;">
# template_uuid: is the uuid of the template to be executed
# input_paths: an array of inputs for the pipeline. Use either a collection's "uuid"
# or a file's "uuid/file_name" path in this array. If the pipeline does not require
-# any inputs, this can be omitted.
+# any inputs, this can be omitted.
# max_wait_seconds: max time in seconds to wait for the pipeline run to complete.
# Default value of 30 seconds is used when this value is not provided.
diagnostics:
assets.debug: true
profiling_enabled: true
site_name: Arvados Workbench (dev)
- local_modified: "<%= '-modified' if `git status -s` != '' %>"
# API server configuration
arvados_login_base: ~
# Below is a sample setting of user_profile_form_fields config parameter.
# This configuration parameter should be set to either false (to disable) or
- # to an array as shown below.
+ # to an array as shown below.
# Configure the list of input fields to be displayed in the profile page
# using the attribute "key" for each of the input fields.
# This sample shows configuration with one required and one optional form fields.
# the profile page.
user_profile_form_message: Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.
- # source_version
- source_version: "<%= `git log -n 1 --format=%h`.strip %>"
- local_modified: false
+ # Override the automatic version string. With the default value of
+ # false, the version string is read from git-commit.version in
+ # Rails.root (included in vendor packages) or determined by invoking
+ # "git log".
+ source_version: false
# report notification to and from addresses
issue_reporter_email_from: arvados@example.com
# Custom directories with classes and modules you want to be autoloadable.
# config.autoload_paths += %W(#{config.root}/extras)
+ config.autoload_paths += %W(#{config.root}/lib)
# Only load the plugins named here, in the order given (default is alphabetical).
# :all can be used as a placeholder for all plugins not explicitly named.
--- /dev/null
+# If you change this file, you'll probably also want to make the same
+# changes in services/api/lib/app_version.rb.
+
+class AppVersion
+ def self.git(*args, &block)
+ IO.popen(["git", "--git-dir", ".git"] + args, "r",
+ chdir: Rails.root.join('../..'),
+ err: "/dev/null",
+ &block)
+ end
+
+ def self.forget
+ @hash = nil
+ end
+
+ # Return abbrev commit hash for current code version: "abc1234", or
+ # "abc1234-modified" if there are uncommitted changes. If present,
+ # return contents of {root}/git-commit.version instead.
+ def self.hash
+ if (cached = Rails.configuration.source_version || @hash)
+ return cached
+ end
+
+ # Read the version from our package's git-commit.version file, if available.
+ begin
+ @hash = IO.read(Rails.root.join("git-commit.version")).strip
+ rescue Errno::ENOENT
+ end
+
+ if @hash.nil? or @hash.empty?
+ begin
+ local_modified = false
+ git("status", "--porcelain") do |git_pipe|
+ git_pipe.each_line do |_|
+ STDERR.puts _
+ local_modified = true
+ # Continue reading the pipe so git doesn't get SIGPIPE.
+ end
+ end
+ if $?.success?
+ git("log", "-n1", "--format=%H") do |git_pipe|
+ git_pipe.each_line do |line|
+ @hash = line.chomp[0...8] + (local_modified ? '-modified' : '')
+ end
+ end
+ end
+ rescue SystemCallError
+ end
+ end
+
+ @hash || "unknown"
+ end
+end
<notextile>
<pre><code><span class="userinput">sudo gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
\curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.1
-sudo adduser "$USER" rvm
</span></code></pre></notextile>
Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
<pre><code><span class="userinput">sudo apt-get install \
gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
- libncurses5-dev automake libtool bison pkg-config libffi-dev
+ libncurses5-dev automake libtool bison pkg-config libffi-dev curl
</span></code></pre></notextile>
Build and install Ruby:
<notextile>
<pre><code><span class="userinput">mkdir -p ~/src
cd ~/src
-curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz | tar xz
-cd ruby-2.1.6
+curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.7.tar.gz | tar xz
+cd ruby-2.1.7
./configure --disable-install-rdoc
make
sudo make install
<notextile>
<ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
<li><p>Puma is already included with the API server's gems. We recommend you run it as a service under <a href="http://smarden.org/runit/">runit</a> or a similar tool. Here's a sample runit script for that:</p>
h3. Debian and Ubuntu
-Packages are available for Debian 7 ("wheezy"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
+Packages are available for Debian 7 ("wheezy"), Debian 8 ("jessie"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
First, register the Curoverse signing key in apt's database:
table(table table-bordered table-condensed).
|OS version|Command|
|Debian 7 ("wheezy")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Debian 8 ("jessie")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ jessie main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Ubuntu 12.04 ("precise")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ precise main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Ubuntu 14.04 ("trusty")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
---
layout: default
navsection: installguide
-title: Install Single Sign On (SSO) server
+title: Install the Single Sign On (SSO) server
...
-h2(#dependencies). Install dependencies
+h2(#dependencies). Install prerequisites
-h3(#install_git_curl). Install git and curl
-
-{% include 'install_git_curl' %}
+The Arvados package repository includes an SSO server package that can help automate much of the deployment.
h3(#install_ruby_and_bundler). Install Ruby and Bundler
{% include 'install_ruby_and_bundler' %}
-h3(#install_postgres). Install PostgreSQL
+h3(#install_web_server). Set up a Web server
+
+For best performance, we recommend you use Nginx as your Web server frontend with a Passenger backend to serve the SSO server. The Passenger team provides "Nginx + Passenger installation instructions":https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html.
-{% include 'install_postgres' %}
+Follow the instructions until you see the section that says you are ready to deploy your Ruby application on the production server.
-h2(#install). Install SSO server
+h2(#install). Install the SSO server
-h3. Get SSO server code and run bundle
+On a Debian-based system, install the following package:
<notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
-~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">bundle install --without=development</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install arvados-sso-server</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following package:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install arvados-sso-server</span>
+</code></pre>
+</notextile>
-h2. Configure the SSO server
+h2(#configure). Configure the SSO server
-First, copy the example configuration file:
+The package has installed three configuration files in @/etc/arvados/sso@:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
-</code></pre></notextile>
+<pre><code>/etc/arvados/sso/application.yml
+/etc/arvados/sso/database.yml
+/etc/arvados/sso/production.rb
+</code></pre>
+</notextile>
+
+The SSO server runs from the @/var/www/arvados-sso/current/@ directory. The files @/var/www/arvados-sso/current/config/application.yml@, @/var/www/arvados-sso/current/config/database.yml@ and @/var/www/arvados-sso/current/config/environments/production.rb@ are symlinked to the configuration files in @/etc/arvados/sso/@.
The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience only.
-Consult @config/application.default.yml@ for a full list of configuration options. Local configuration goes in @config/application.yml@, do not edit @config/application.default.yml@.
+Consult @config/application.default.yml@ for a full list of configuration options. Local configuration goes in @/etc/arvados/sso/application.yml@, do not edit @config/application.default.yml@.
h3(#uuid_prefix). uuid_prefix
Generate a uuid prefix for the single sign on service. This prefix is used to identify user records as originating from this site. It must be exactly 5 lowercase ASCII letters and/or digits. You may use the following snippet to generate a uuid prefix:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
abcde
</code></pre></notextile>
-Edit @config/application.yml@ and set @uuid_prefix@ in the "common" section.
+Edit @/etc/arvados/sso/application.yml@ and set @uuid_prefix@ in the "common" section.
h3(#secret_token). secret_token
Generate a new secret token for signing cookies:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
</code></pre></notextile>
-Edit @config/application.yml@ and set @secret_token@ in the "common" section.
+Edit @/etc/arvados/sso/application.yml@ and set @secret_token@ in the "common" section.
+
+There are other configuration options in @/etc/arvados/sso/application.yml@. See the "Authentication methods":install-sso.html#authentication_methods section below for more details.
h2(#database). Set up the database
-Generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
+If PostgreSQL was newly installed as a dependency of the @arvados-sso-server@ package, you will need to start the service.
+
+On a Debian-based system:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">sudo service postgresql start</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, we also need to initialize the database system:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo service postgresql initdb</span>
+~$ <span class="userinput">sudo service postgresql start</span>
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+
+If you are installing on CentOS6, you will need to modify PostgreSQL's configuration to allow password authentication for local users. The default configuration allows 'ident' only. The following commands will make the configuration change, and restart PostgreSQL for it to take effect.
+<br/>
+<notextile>
+<pre><code>~$ <span class="userinput">sudo sed -i -e "s/127.0.0.1\/32 ident/127.0.0.1\/32 md5/" /var/lib/pgsql/data/pg_hba.conf</span>
+~$ <span class="userinput">sudo sed -i -e "s/::1\/128 ident/::1\/128 md5/" /var/lib/pgsql/data/pg_hba.conf</span>
+~$ <span class="userinput">sudo service postgresql restart</span>
+</code></pre>
+</notextile>
+{% include 'notebox_end' %}
+
+
+Next, generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
+
+<notextile>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
abcdefghijklmnopqrstuvwxyz012345689
</code></pre></notextile>
+Configure the SSO server to connect to your database by updating @/etc/arvados/sso/database.yml@. Replace the @xxxxxxxx@ database password placeholder with the new password you generated above. Be sure to update the @production@ section.
+
+<notextile>
+<pre><code>~$ <span class="userinput">editor /etc/arvados/sso/database.yml</span>
+</code></pre></notextile>
+
Create a new database user with permission to create its own databases.
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
Enter it again: <span class="userinput">paste-database-password-you-generated</span>
</code></pre></notextile>
-Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above. If you are planning a production system, update the @production@ section, otherwise use @development@.
+Rails will take care of creating the database, based on the information from @/etc/arvados/sso/database.yml@.
+
+Alternatively, if the database user you intend to use for the SSO server should not be allowed to create new databases, the user and the database can be created like this:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.example config/database.yml</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">edit config/database.yml</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --encrypted -R -S --pwprompt arvados_sso</span>
+Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
+Enter it again: <span class="userinput">paste-database-password-you-generated</span>
+~$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
</code></pre></notextile>
-Create and initialize the database. If you are planning a production system, choose the @production@ rails environment, otherwise use @development@.
+h2(#reconfigure_package). Reconfigure the package
+
+Now that the @/etc/arvados/sso/application.yml@ and @/etc/arvados/sso/database.yml@ files have been updated, we need to reconfigure our installed package. Doing so will create and/or initialize the database and precompile the assets.
+
+On a Debian-based system:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:setup</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo dpkg-reconfigure arvados-sso-server</span>
+</code></pre>
+</notextile>
-Alternatively, if the database user you intend to use for the SSO server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above:
+On a Red Hat-based system, we need to reinstall the package instead:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:schema:load</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo yum reinstall arvados-sso-server</span>
+</code></pre>
+</notextile>
h2(#client). Create arvados-server client
Use @rails console@ to create a @Client@ record that will be used by the Arvados API server. The values of @app_id@ and @app_secret@ correspond to the values for @sso_app_id@ and @sso_app_secret@ in the "API server's SSO settings.":install-api-server.html#omniauth
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+~$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
:001 > <span class="userinput">c = Client.new</span>
:002 > <span class="userinput">c.name = "joshid"</span>
:003 > <span class="userinput">c.app_id = "arvados-server"</span>
</code></pre>
</notextile>
-h2(#assets). Precompile assets
+h2(#configure_web_server). Configure your web server
-If you are running in the production environment, you must precompile the assets:
+Edit the http section of your Nginx configuration to run the Passenger server and act as a frontend for it. You might add a block like the following, adding SSL and logging parameters to taste:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
+<pre><code>server {
+ listen 127.0.0.1:8900;
+ server_name localhost-sso;
+
+ root /var/www/arvados-sso/current/public;
+ index index.html;
+
+ passenger_enabled on;
+ # If you're not using RVM, comment out the line below.
+ passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+}
+
+upstream sso {
+ server 127.0.0.1:8900 fail_timeout=10s;
+}
+
+proxy_http_version 1.1;
+
+server {
+ listen <span class="userinput">[your public IP address]</span>:443 ssl;
+ server_name auth.<span class="userinput">your.domain</span>;
+
+ ssl on;
+ ssl_certificate <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+ ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+ index index.html;
+
+ location / {
+ proxy_pass http://sso;
+ proxy_redirect off;
+ proxy_connect_timeout 90s;
+ proxy_read_timeout 300s;
+
+ proxy_set_header X-Forwarded-Proto https;
+ proxy_set_header Host $http_host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ }
+}
</code></pre>
</notextile>
+Finally, restart Nginx and your Arvados SSO server should be up and running. You can verify that by visiting the URL you configured your Nginx web server to listen on in the server section above (port 443). Read on if you want to configure your Arvados SSO server to use a different authentication backend.
+
h2(#authentication_methods). Authentication methods
Authentication methods are configured in @application.yml@. Currently three authentication methods are supported: local accounts, LDAP, and Google+. If neither Google+ nor LDAP are enabled, the SSO server defaults to local user accounts. Only one authentication mechanism should be in use at a time.
If @allow_account_registration@ is false, you may manually create local accounts on the SSO server from the rails console:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+<pre><code>~$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
:001 > <span class="userinput">user = User.new(:email => "test@example.com")</span>
:002 > <span class="userinput">user.password = "passw0rd"</span>
:003 > <span class="userinput">user.save!</span>
google_oauth2_client_id: <span class="userinput">"---YOUR---CLIENT---ID---HERE--"-</span>
google_oauth2_client_secret: <span class="userinput">"---YOUR---CLIENT---SECRET---HERE--"-</span></code></pre></notextile>
-h2(#start). Set up a Web server
-
-For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend to serve the SSO server. To do that:
-
-<notextile>
-<ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
-
-<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for it. You might add a block like the following, adding SSL and logging parameters to taste:</p>
-
-<pre><code>server {
- listen 127.0.0.1:8900;
- server_name localhost-sso;
-
- root <span class="userinput">/YOUR/PATH/TO/sso-devise-omniauth-provider/public</span>;
- index index.html index.htm index.php;
-
- passenger_enabled on;
- # If you're using RVM, uncomment the line below.
- #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
-}
-
-upstream sso {
- server 127.0.0.1:8900 fail_timeout=10s;
-}
-
-proxy_http_version 1.1;
-
-server {
- listen <span class="userinput">[your public IP address]</span>:443 ssl;
- server_name auth.<span class="userinput">your.domain</span>;
-
- ssl on;
- ssl_certificate <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
- ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
-
- index index.html index.htm index.php;
-
- location / {
- proxy_pass http://sso;
- proxy_redirect off;
- proxy_connect_timeout 90s;
- proxy_read_timeout 300s;
-
- proxy_set_header X-Forwarded-Proto https;
- proxy_set_header Host $http_host;
- proxy_set_header X-Real-IP $remote_addr;
- proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
- }
-}
-</code></pre>
-</li>
-
-<li>Restart Nginx.</li>
-
-</ol>
-</notextile>
-{% include 'notebox_begin' %}
-
-If you see the following warning "you may safely ignore it":https://stackoverflow.com/questions/10374871/no-secret-option-provided-to-racksessioncookie-warning:
-
-<pre>
-SECURITY WARNING: No secret option provided to Rack::Session::Cookie.
-This poses a security threat. It is strongly recommended that you
-provide a secret to prevent exploits that may be possible from crafted
-cookies. This will not be supported in future versions of Rack, and
-future versions will even invalidate your existing user cookies.
-
-Called from: /var/lib/gems/2.1.0/gems/actionpack-3.2.8/lib/action_dispatch/middleware/session/abstract_store.rb:28:in `initialize'.
-</pre>
-
-{% include 'notebox_end' %}
h2. Install prerequisites
-The Arvados package repository includes Workbench server package that can help automate much of the deployment.
+The Arvados package repository includes a Workbench server package that can help automate much of the deployment.
h3(#install_ruby_and_bundler). Install Ruby and Bundler
<notextile>
<ol>
-<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
<li>If you're deploying on CentOS and using the python27 Software Collection, configure Nginx to use it:
</pre>
</notextile>
+h3(#arv-get). arv get
+
+@arv get@ can be used to get a textual representation of Arvados objects from the command line. The output can be limited to a subset of the object's fields. This command can be used with only the knowledge of an object's UUID.
+
+<notextile>
+<pre>
+$ <code class="userinput">arv get --help</code>
+Usage: arv [--format json|yaml] get [uuid] [fields...]
+
+Fetch the specified Arvados object, select the specified fields,
+and print a text representation.
+</pre>
+</notextile>
+
h3(#arv-edit). arv edit
@arv edit@ can be used to edit Arvados objects from the command line. Arv edit opens up the editor of your choice (set the EDITOR environment variable) with the json or yaml description of the object. Saving the file will update the Arvados object on the API server, if it passes validation.
Usage: arv keep [method] [--parameters]
Use 'arv keep [method] --help' to get more information about specific methods.
-Available methods: ls, get, put, less, check, docker
+Available methods: ls, get, put, docker
</pre>
</notextile>
h2. arv-copy
-@arv-copy@ allows users to copy collections, pipeline templates, and pipeline instances from one cluster to another. By default, @arv-copy@ will recursively go through a template or instance and copy all dependencies associated with the object.
+@arv-copy@ allows users to copy collections and pipeline templates from one cluster to another. By default, @arv-copy@ will recursively go through a template and copy all dependencies associated with the object.
For example, let's copy from our <a href="https://cloud.curoverse.com/">beta cloud instance *qr1hi*</a> to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi.
</code></pre>
</notextile>
-h3. How to copy a pipeline template or pipeline instance
+h3. How to copy a pipeline template
{% include 'arv_copy_expectations' %}
-We will use the uuid @qr1hi-d1hrv-nao0ohw8y7dpf84@ as an example pipeline instance.
+We will use the uuid @qr1hi-p5p6p-9pkaxt6qjnkxhhu@ as an example pipeline template.
<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
* [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
-arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-d1hrv-rym2h5ub9m8ofwj
+arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-p5p6p-rym2h5ub9m8ofwj
</code></pre>
</notextile>
New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@.
-By default, if you copy a pipeline instance recursively, you will find that the template as well as all the dependencies are in your home project.
+By default, if you copy a pipeline template recursively, you will find that the template as well as all the dependencies are in your home project.
If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag.
For example, we can copy the same object using this tag.
<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
</code></pre>
</notextile>
MAINTAINER Ward Vandewege <ward@curoverse.com>
RUN apt-get update -q
-RUN apt-get install -qy supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse crunchstat python-arvados-fuse cron dnsmasq
+## 20150915 nico -- fuse.postint has sporatic failures, spliting this up to see if it helps
+RUN apt-get install -qy fuse
+RUN apt-get install -qy supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse crunchstat python-arvados-fuse cron dnsmasq
ADD fuse.conf /etc/fuse.conf
RUN chmod 644 /etc/fuse.conf
RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
RUN apt-get update -q
-RUN apt-get install -qy git python-minimal python-virtualenv python-arvados-python-client
+RUN apt-get install -qy git python-pip python-virtualenv python-arvados-python-client python-dev libcurl4-gnutls-dev
RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3
# Ward Vandewege <ward@curoverse.com>
require 'fileutils'
+require 'shellwords'
if RUBY_VERSION < '1.9.3' then
abort <<-EOS
end
-subcommands = %w(copy create edit keep pipeline run tag ws)
+subcommands = %w(copy create edit get keep pipeline run tag ws)
+
+def exec_bin bin, opts
+ bin_path = `which #{bin.shellescape}`.strip
+ if bin_path.empty?
+ raise "#{bin}: command not found"
+ end
+ exec bin_path, *opts
+end
def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
case subcommand
arv_create client, arvados, global_opts, remaining_opts
when 'edit'
arv_edit client, arvados, global_opts, remaining_opts
+ when 'get'
+ arv_get client, arvados, global_opts, remaining_opts
when 'copy', 'tag', 'ws', 'run'
- exec `which arv-#{subcommand}`.strip, *remaining_opts
+ exec_bin "arv-#{subcommand}", remaining_opts
when 'keep'
@sub = remaining_opts.shift
if ['get', 'put', 'ls', 'normalize'].index @sub then
# Native Arvados
- exec `which arv-#{@sub}`.strip, *remaining_opts
- elsif ['less', 'check'].index @sub then
- # wh* shims
- exec `which wh#{@sub}`.strip, *remaining_opts
+ exec_bin "arv-#{@sub}", remaining_opts
elsif @sub == 'docker'
- exec `which arv-keepdocker`.strip, *remaining_opts
+ exec_bin "arv-keepdocker", remaining_opts
else
puts "Usage: arv keep [method] [--parameters]\n"
puts "Use 'arv keep [method] --help' to get more information about specific methods.\n\n"
- puts "Available methods: ls, get, put, less, check, docker"
+ puts "Available methods: ls, get, put, docker"
end
abort
when 'pipeline'
sub = remaining_opts.shift
if sub == 'run'
- exec `which arv-run-pipeline-instance`.strip, *remaining_opts
+ exec_bin "arv-run-pipeline-instance", remaining_opts
else
puts "Usage: arv pipeline [method] [--parameters]\n"
puts "Use 'arv pipeline [method] --help' to get more information about specific methods.\n\n"
def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
- content = case global_opts[:format]
- when 'json'
- Oj.dump(initial_obj, :indent => 1)
- when 'yaml'
- initial_obj.to_yaml
- else
- abort "Unrecognized format #{global_opts[:format]}"
- end
+ content = get_obj_content initial_obj, global_opts
tmp_file = Tempfile.new([tmp_stem, ".#{global_opts[:format]}"])
tmp_file.write(content)
Oj.load(newcontent)
when 'yaml'
YAML.load(newcontent)
+ else
+ abort "Unrecognized format #{global_opts[:format]}"
end
yield newobj
results
end
-def arv_edit client, arvados, global_opts, remaining_opts
- uuid = remaining_opts.shift
- if uuid.nil? or uuid == "-h" or uuid == "--help"
- puts head_banner
- puts "Usage: arv edit [uuid] [fields...]\n\n"
- puts "Fetch the specified Arvados object, select the specified fields, \n"
- puts "open an interactive text editor on a text representation (json or\n"
- puts "yaml, use --format) and then update the object. Will use 'nano'\n"
- puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
- exit 255
- end
-
- # determine controller
-
+def lookup_uuid_rsc arvados, uuid
m = /([a-z0-9]{5})-([a-z0-9]{5})-([a-z0-9]{15})/.match uuid
if !m
if /^[a-f0-9]{32}/.match uuid
abort "Could not determine resource type #{m[2]}"
end
+ return rsc
+end
+
+def fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
begin
result = client.execute(:api_method => eval('arvados.' + rsc + '.get'),
:parameters => {"uuid" => uuid},
:headers => {
authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
})
- oldobj = check_response result
+ obj = check_response result
rescue => e
abort "Server error: #{e}"
end
if remaining_opts.length > 0
- oldobj.select! { |k, v| remaining_opts.include? k }
+ obj.select! { |k, v| remaining_opts.include? k }
+ end
+
+ return obj
+end
+
+def get_obj_content obj, global_opts
+ content = case global_opts[:format]
+ when 'json'
+ Oj.dump(obj, :indent => 1)
+ when 'yaml'
+ obj.to_yaml
+ else
+ abort "Unrecognized format #{global_opts[:format]}"
+ end
+ return content
+end
+
+def arv_edit client, arvados, global_opts, remaining_opts
+ uuid = remaining_opts.shift
+ if uuid.nil? or uuid == "-h" or uuid == "--help"
+ puts head_banner
+ puts "Usage: arv edit [uuid] [fields...]\n\n"
+ puts "Fetch the specified Arvados object, select the specified fields, \n"
+ puts "open an interactive text editor on a text representation (json or\n"
+ puts "yaml, use --format) and then update the object. Will use 'nano'\n"
+ puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
+ exit 255
end
+ rsc = lookup_uuid_rsc arvados, uuid
+ oldobj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
edit_and_commit_object oldobj, uuid, global_opts do |newobj|
newobj.select! {|k| newobj[k] != oldobj[k]}
if !newobj.empty?
exit 0
end
+def arv_get client, arvados, global_opts, remaining_opts
+ uuid = remaining_opts.shift
+ if uuid.nil? or uuid == "-h" or uuid == "--help"
+ puts head_banner
+ puts "Usage: arv [--format json|yaml] get [uuid] [fields...]\n\n"
+ puts "Fetch the specified Arvados object, select the specified fields,\n"
+ puts "and print a text representation.\n"
+ exit 255
+ end
+
+ rsc = lookup_uuid_rsc arvados, uuid
+ obj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+ content = get_obj_content obj, global_opts
+
+ puts content
+ exit 0
+end
+
def arv_create client, arvados, global_opts, remaining_opts
types = resource_types(arvados.discovery_document)
create_opts = Trollop::options do
my $cleanpid = fork();
if ($cleanpid == 0)
{
- # Find FUSE mounts that look like Keep mounts (the mount path has the
- # word "keep") and unmount them. Then clean up work directories.
- # TODO: When #5036 is done and widely deployed, we can get rid of the
- # regular expression and just unmount everything with type fuse.keep.
+ # Find FUSE mounts under $CRUNCH_TMP and unmount them.
+ # Then clean up work directories.
+ # TODO: When #5036 is done and widely deployed, we can limit mount's
+ # -t option to simply fuse.keep.
srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
- ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+ ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
exit (1);
}
while (1)
my $gotsome
= readfrompipes ()
+ reapchildren ();
- if (!$gotsome)
+ if (!$gotsome || ($latest_refresh + 2 < scalar time))
{
check_refresh_wanted();
check_squeue();
my $python_dir = "$install_dir/python";
if ((-d $python_dir) and can_run("python2.7")) {
open(my $egg_info_pipe, "-|",
- "python2.7 \Q$python_dir/setup.py\E --quiet egg_info 2>&1 >/dev/null");
+ "python2.7 \Q$python_dir/setup.py\E egg_info 2>&1 >/dev/null");
my @egg_info_errors = <$egg_info_pipe>;
close($egg_info_pipe);
+
if ($?) {
- if (@egg_info_errors and ($egg_info_errors[-1] =~ /\bgit\b/)) {
+ if (@egg_info_errors and (($egg_info_errors[-1] =~ /\bgit\b/) or ($egg_info_errors[-1] =~ /\[Errno 2\]/))) {
# egg_info apparently failed because it couldn't ask git for a build tag.
# Specify no build tag.
open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
} else {
my $egg_info_exit = $? >> 8;
foreach my $errline (@egg_info_errors) {
- print STDERR_ORIG $errline;
+ warn $errline;
}
warn "python setup.py egg_info failed: exit $egg_info_exit";
exit ($egg_info_exit || 1);
require 'minitest/autorun'
-require 'digest/md5'
+require 'json'
+require 'yaml'
+# Black box tests for 'arv get' command.
class TestArvGet < Minitest::Test
- def setup
- begin
- Dir.mkdir './tmp'
- rescue Errno::EEXIST
- end
- @@foo_manifest_locator ||= `echo -n foo | ./bin/arv-put --filename foo --no-progress -`.strip
- @@baz_locator ||= `echo -n baz | ./bin/arv-put --as-raw --no-progress -`.strip
- @@multilevel_manifest_locator ||= `echo ./foo/bar #{@@baz_locator} 0:3:baz | ./bin/arv-put --as-raw --no-progress -`.strip
- end
+ # UUID for an Arvados object that does not exist
+ NON_EXISTENT_OBJECT_UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+ # Name of field of Arvados object that can store any (textual) value
+ STORED_VALUE_FIELD_NAME = "name"
+ # Name of UUID field of Arvados object
+ UUID_FIELD_NAME = "uuid"
+ # Name of an invalid field of Arvados object
+ INVALID_FIELD_NAME = "invalid"
- def test_no_args
+ # Tests that a valid Arvados object can be retrieved in a supported format
+ # using: `arv get [uuid]`. Given all other `arv foo` commands return JSON
+ # when no format is specified, JSON should be expected in this case.
+ def test_get_valid_object_no_format_specified
+ stored_value = __method__.to_s
+ uuid = create_arv_object_with_value(stored_value)
out, err = capture_subprocess_io do
- assert_arv_get false
+ assert(arv_get_default(uuid))
end
- assert_equal '', out
- assert_match /^usage:/, err
+ assert_empty(err, "Error text not expected: '#{err}'")
+ arv_object = parse_json_arv_object(out)
+ assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
end
- def test_help
+ # Tests that a valid Arvados object can be retrieved in JSON format using:
+ # `arv get [uuid] --format json`.
+ def test_get_valid_object_json_format_specified
+ stored_value = __method__.to_s
+ uuid = create_arv_object_with_value(stored_value)
out, err = capture_subprocess_io do
- assert_arv_get '-h'
+ assert(arv_get_json(uuid))
end
- $stderr.write err
- assert_equal '', err
- assert_match /^usage:/, out
+ assert_empty(err, "Error text not expected: '#{err}'")
+ arv_object = parse_json_arv_object(out)
+ assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
end
- def test_file_to_dev_stdout
- test_file_to_stdout('/dev/stdout')
- end
-
- def test_file_to_stdout(specify_stdout_as='-')
+ # Tests that a valid Arvados object can be retrieved in YAML format using:
+ # `arv get [uuid] --format yaml`.
+ def test_get_valid_object_yaml_format_specified
+ stored_value = __method__.to_s
+ uuid = create_arv_object_with_value(stored_value)
out, err = capture_subprocess_io do
- assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
+ assert(arv_get_yaml(uuid))
end
- assert_equal '', err
- assert_equal 'foo', out
+ assert_empty(err, "Error text not expected: '#{err}'")
+ arv_object = parse_yaml_arv_object(out)
+ assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
end
- def test_file_to_file
- remove_tmp_foo
+ # Tests that a subset of all fields of a valid Arvados object can be retrieved
+ # using: `arv get [uuid] [fields...]`.
+ def test_get_valid_object_with_valid_fields
+ stored_value = __method__.to_s
+ uuid = create_arv_object_with_value(stored_value)
out, err = capture_subprocess_io do
- assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
+ assert(arv_get_json(uuid, STORED_VALUE_FIELD_NAME, UUID_FIELD_NAME))
end
- assert_equal '', err
- assert_equal '', out
- assert_equal 'foo', IO.read('tmp/foo')
+ assert_empty(err, "Error text not expected: '#{err}'")
+ arv_object = parse_json_arv_object(out)
+ assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
+ assert(has_field_with_value(arv_object, UUID_FIELD_NAME, uuid))
end
- def test_file_to_file_no_overwrite_file
- File.open './tmp/foo', 'wb' do |f|
- f.write 'baz'
- end
+ # Tests that the valid field is retrieved when both a valid and invalid field
+ # are requested from a valid Arvados object, using:
+ # `arv get [uuid] [fields...]`.
+ def test_get_valid_object_with_both_valid_and_invalid_fields
+ stored_value = __method__.to_s
+ uuid = create_arv_object_with_value(stored_value)
out, err = capture_subprocess_io do
- assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
+ assert(arv_get_json(uuid, STORED_VALUE_FIELD_NAME, INVALID_FIELD_NAME))
end
- assert_match /Local file tmp\/foo already exists/, err
- assert_equal '', out
- assert_equal 'baz', IO.read('tmp/foo')
+ assert_empty(err, "Error text not expected: '#{err}'")
+ arv_object = parse_json_arv_object(out)
+ assert(has_field_with_value(arv_object, STORED_VALUE_FIELD_NAME, stored_value))
+ refute(has_field_with_value(arv_object, INVALID_FIELD_NAME, stored_value))
end
- def test_file_to_file_no_overwrite_file_in_dir
- File.open './tmp/foo', 'wb' do |f|
- f.write 'baz'
- end
+ # Tests that no fields are retreived when no valid fields are requested from
+ # a valid Arvados object, using: `arv get [uuid] [fields...]`.
+ def test_get_valid_object_with_no_valid_fields
+ stored_value = __method__.to_s
+ uuid = create_arv_object_with_value(stored_value)
out, err = capture_subprocess_io do
- assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
+ assert(arv_get_json(uuid, INVALID_FIELD_NAME))
end
- assert_match /Local file tmp\/foo already exists/, err
- assert_equal '', out
- assert_equal 'baz', IO.read('tmp/foo')
+ assert_empty(err, "Error text not expected: '#{err}'")
+ arv_object = parse_json_arv_object(out)
+ assert_equal(0, arv_object.length)
end
- def test_file_to_file_force_overwrite
- File.open './tmp/foo', 'wb' do |f|
- f.write 'baz'
- end
- assert_equal 'baz', IO.read('tmp/foo')
+ # Tests that an invalid (non-existent) Arvados object is not retrieved using:
+ # using: `arv get [non-existent-uuid]`.
+ def test_get_invalid_object
out, err = capture_subprocess_io do
- assert_arv_get '-f', @@foo_manifest_locator + '/', 'tmp/'
+ refute(arv_get_json(NON_EXISTENT_OBJECT_UUID))
end
- assert_match '', err
- assert_equal '', out
- assert_equal 'foo', IO.read('tmp/foo')
+ refute_empty(err, "Expected error feedback on request for invalid object")
+ assert_empty(out)
end
- def test_file_to_file_skip_existing
- File.open './tmp/foo', 'wb' do |f|
- f.write 'baz'
- end
- assert_equal 'baz', IO.read('tmp/foo')
+ # Tests that help text exists using: `arv get --help`.
+ def test_help_exists
out, err = capture_subprocess_io do
- assert_arv_get '--skip-existing', @@foo_manifest_locator + '/', 'tmp/'
+# assert(arv_get_default("--help"), "Expected exit code 0: #{$?}")
+ #XXX: Exit code given is 255. It probably should be 0, which seems to be
+ # standard elsewhere. However, 255 is in line with other `arv`
+ # commands (e.g. see `arv edit`) so ignoring the problem here.
+ arv_get_default("--help")
end
- assert_match '', err
- assert_equal '', out
- assert_equal 'baz', IO.read('tmp/foo')
+ assert_empty(err, "Error text not expected: '#{err}'")
+ refute_empty(out, "Help text should be given")
end
- def test_file_to_dir
- remove_tmp_foo
- out, err = capture_subprocess_io do
- assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
- end
- assert_equal '', err
- assert_equal '', out
- assert_equal 'foo', IO.read('tmp/foo')
- end
-
- def test_dir_to_file
- out, err = capture_subprocess_io do
- assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
- end
- assert_equal '', out
- assert_match /^usage:/, err
- end
-
- def test_dir_to_empty_string
- out, err = capture_subprocess_io do
- assert_arv_get false, @@foo_manifest_locator + '/', ''
- end
- assert_equal '', out
- assert_match /^usage:/, err
- end
-
- def test_nonexistent_block
- out, err = capture_subprocess_io do
- assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
- end
- assert_equal '', out
- assert_match /Error:/, err
- end
-
- def test_nonexistent_manifest
- out, err = capture_subprocess_io do
- assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
- end
- assert_equal '', out
- assert_match /Error:/, err
- end
-
- def test_manifest_root_to_dir
- remove_tmp_foo
- out, err = capture_subprocess_io do
- assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
- end
- assert_equal '', err
- assert_equal '', out
- assert_equal 'foo', IO.read('tmp/foo')
- end
-
- def test_manifest_root_to_dir_noslash
- remove_tmp_foo
- out, err = capture_subprocess_io do
- assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
- end
- assert_equal '', err
- assert_equal '', out
- assert_equal 'foo', IO.read('tmp/foo')
- end
-
- def test_display_md5sum
- remove_tmp_foo
- out, err = capture_subprocess_io do
- assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
- end
- assert_equal "#{Digest::MD5.hexdigest('foo')} ./foo\n", err
- assert_equal '', out
- assert_equal 'foo', IO.read('tmp/foo')
+ protected
+ # Runs 'arv get <varargs>' with given arguments. Returns whether the exit
+ # status was 0 (i.e. success). Use $? to attain more details on failure.
+ def arv_get_default(*args)
+ return system("arv", "get", *args)
end
- def test_md5sum_nowrite
- remove_tmp_foo
- out, err = capture_subprocess_io do
- assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
- end
- assert_equal "#{Digest::MD5.hexdigest('foo')} ./foo\n", err
- assert_equal '', out
- assert_equal false, File.exists?('tmp/foo')
+ # Runs 'arv --format json get <varargs>' with given arguments. Returns whether
+ # the exit status was 0 (i.e. success). Use $? to attain more details on
+ # failure.
+ def arv_get_json(*args)
+ return system("arv", "--format", "json", "get", *args)
end
- def test_sha1_nowrite
- remove_tmp_foo
- out, err = capture_subprocess_io do
- assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
- end
- assert_equal "#{Digest::SHA1.hexdigest('foo')} ./foo\n", err
- assert_equal '', out
- assert_equal false, File.exists?('tmp/foo')
+ # Runs 'arv --format yaml get <varargs>' with given arguments. Returns whether
+ # the exit status was 0 (i.e. success). Use $? to attain more details on
+ # failure.
+ def arv_get_yaml(*args)
+ return system("arv", "--format", "yaml", "get", *args)
end
- def test_block_to_file
- remove_tmp_foo
+ # Creates an Arvados object that stores a given value. Returns the uuid of the
+ # created object.
+ def create_arv_object_with_value(value)
out, err = capture_subprocess_io do
- assert_arv_get @@foo_manifest_locator, 'tmp/foo'
+ system("arv", "tag", "add", value, "--object", "testing")
+ assert $?.success?, "Command failure running `arv tag`: #{$?}"
end
assert_equal '', err
- assert_equal '', out
-
- digest = Digest::MD5.hexdigest('foo')
- !(IO.read('tmp/foo')).gsub!( /^(. #{digest}+3)(.*)( 0:3:foo)$/).nil?
+ assert_operator 0, :<, out.strip.length
+ out.strip
end
- def test_create_directory_tree
- `rm -rf ./tmp/arv-get-test/`
- Dir.mkdir './tmp/arv-get-test'
- out, err = capture_subprocess_io do
- assert_arv_get @@multilevel_manifest_locator + '/', 'tmp/arv-get-test/'
+ # Parses the given JSON representation of an Arvados object, returning
+ # an equivalent Ruby representation (a hash map).
+ def parse_json_arv_object(arvObjectAsJson)
+ begin
+ parsed = JSON.parse(arvObjectAsJson)
+ assert(parsed.instance_of?(Hash))
+ return parsed
+ rescue JSON::ParserError => e
+ raise "Invalid JSON representation of Arvados object.\n" \
+ "Parse error: '#{e}'\n" \
+ "JSON: '#{arvObjectAsJson}'\n"
end
- assert_equal '', err
- assert_equal '', out
- assert_equal 'baz', IO.read('tmp/arv-get-test/foo/bar/baz')
end
- def test_create_partial_directory_tree
- `rm -rf ./tmp/arv-get-test/`
- Dir.mkdir './tmp/arv-get-test'
- out, err = capture_subprocess_io do
- assert_arv_get(@@multilevel_manifest_locator + '/foo/',
- 'tmp/arv-get-test/')
+ # Parses the given JSON representation of an Arvados object, returning
+ # an equivalent Ruby representation (a hash map).
+ def parse_yaml_arv_object(arvObjectAsYaml)
+ begin
+ parsed = YAML.load(arvObjectAsYaml)
+ assert(parsed.instance_of?(Hash))
+ return parsed
+ rescue
+ raise "Invalid YAML representation of Arvados object.\n" \
+ "YAML: '#{arvObjectAsYaml}'\n"
end
- assert_equal '', err
- assert_equal '', out
- assert_equal 'baz', IO.read('tmp/arv-get-test/bar/baz')
end
- protected
- def assert_arv_get(*args)
- expect = case args.first
- when true, false
- args.shift
- else
- true
- end
- assert_equal(expect,
- system(['./bin/arv-get', 'arv-get'], *args),
- "`arv-get #{args.join ' '}` " +
- "should exit #{if expect then 0 else 'non-zero' end}")
- end
-
- def remove_tmp_foo
- begin
- File.unlink('tmp/foo')
- rescue Errno::ENOENT
+ # Checks whether the given Arvados object has the given expected value for the
+ # specified field.
+ def has_field_with_value(arvObjectAsHash, fieldName, expectedValue)
+ if !arvObjectAsHash.has_key?(fieldName)
+ return false
end
+ return (arvObjectAsHash[fieldName] == expectedValue)
end
end
--- /dev/null
+require 'minitest/autorun'
+require 'digest/md5'
+
+class TestArvKeepGet < Minitest::Test
+ def setup
+ begin
+ Dir.mkdir './tmp'
+ rescue Errno::EEXIST
+ end
+ @@foo_manifest_locator ||= `echo -n foo | ./bin/arv-put --filename foo --no-progress -`.strip
+ @@baz_locator ||= `echo -n baz | ./bin/arv-put --as-raw --no-progress -`.strip
+ @@multilevel_manifest_locator ||= `echo ./foo/bar #{@@baz_locator} 0:3:baz | ./bin/arv-put --as-raw --no-progress -`.strip
+ end
+
+ def test_no_args
+ out, err = capture_subprocess_io do
+ assert_arv_get false
+ end
+ assert_equal '', out
+ assert_match /^usage:/, err
+ end
+
+ def test_help
+ out, err = capture_subprocess_io do
+ assert_arv_get '-h'
+ end
+ $stderr.write err
+ assert_equal '', err
+ assert_match /^usage:/, out
+ end
+
+ def test_file_to_dev_stdout
+ test_file_to_stdout('/dev/stdout')
+ end
+
+ def test_file_to_stdout(specify_stdout_as='-')
+ out, err = capture_subprocess_io do
+ assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
+ end
+ assert_equal '', err
+ assert_equal 'foo', out
+ end
+
+ def test_file_to_file
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
+ end
+ assert_equal '', err
+ assert_equal '', out
+ assert_equal 'foo', IO.read('tmp/foo')
+ end
+
+ def test_file_to_file_no_overwrite_file
+ File.open './tmp/foo', 'wb' do |f|
+ f.write 'baz'
+ end
+ out, err = capture_subprocess_io do
+ assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
+ end
+ assert_match /Local file tmp\/foo already exists/, err
+ assert_equal '', out
+ assert_equal 'baz', IO.read('tmp/foo')
+ end
+
+ def test_file_to_file_no_overwrite_file_in_dir
+ File.open './tmp/foo', 'wb' do |f|
+ f.write 'baz'
+ end
+ out, err = capture_subprocess_io do
+ assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
+ end
+ assert_match /Local file tmp\/foo already exists/, err
+ assert_equal '', out
+ assert_equal 'baz', IO.read('tmp/foo')
+ end
+
+ def test_file_to_file_force_overwrite
+ File.open './tmp/foo', 'wb' do |f|
+ f.write 'baz'
+ end
+ assert_equal 'baz', IO.read('tmp/foo')
+ out, err = capture_subprocess_io do
+ assert_arv_get '-f', @@foo_manifest_locator + '/', 'tmp/'
+ end
+ assert_match '', err
+ assert_equal '', out
+ assert_equal 'foo', IO.read('tmp/foo')
+ end
+
+ def test_file_to_file_skip_existing
+ File.open './tmp/foo', 'wb' do |f|
+ f.write 'baz'
+ end
+ assert_equal 'baz', IO.read('tmp/foo')
+ out, err = capture_subprocess_io do
+ assert_arv_get '--skip-existing', @@foo_manifest_locator + '/', 'tmp/'
+ end
+ assert_match '', err
+ assert_equal '', out
+ assert_equal 'baz', IO.read('tmp/foo')
+ end
+
+ def test_file_to_dir
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
+ end
+ assert_equal '', err
+ assert_equal '', out
+ assert_equal 'foo', IO.read('tmp/foo')
+ end
+
+ def test_dir_to_file
+ out, err = capture_subprocess_io do
+ assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
+ end
+ assert_equal '', out
+ assert_match /^usage:/, err
+ end
+
+ def test_dir_to_empty_string
+ out, err = capture_subprocess_io do
+ assert_arv_get false, @@foo_manifest_locator + '/', ''
+ end
+ assert_equal '', out
+ assert_match /^usage:/, err
+ end
+
+ def test_nonexistent_block
+ out, err = capture_subprocess_io do
+ assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
+ end
+ assert_equal '', out
+ assert_match /Error:/, err
+ end
+
+ def test_nonexistent_manifest
+ out, err = capture_subprocess_io do
+ assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
+ end
+ assert_equal '', out
+ assert_match /Error:/, err
+ end
+
+ def test_manifest_root_to_dir
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
+ end
+ assert_equal '', err
+ assert_equal '', out
+ assert_equal 'foo', IO.read('tmp/foo')
+ end
+
+ def test_manifest_root_to_dir_noslash
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
+ end
+ assert_equal '', err
+ assert_equal '', out
+ assert_equal 'foo', IO.read('tmp/foo')
+ end
+
+ def test_display_md5sum
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
+ end
+ assert_equal "#{Digest::MD5.hexdigest('foo')} ./foo\n", err
+ assert_equal '', out
+ assert_equal 'foo', IO.read('tmp/foo')
+ end
+
+ def test_md5sum_nowrite
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
+ end
+ assert_equal "#{Digest::MD5.hexdigest('foo')} ./foo\n", err
+ assert_equal '', out
+ assert_equal false, File.exists?('tmp/foo')
+ end
+
+ def test_sha1_nowrite
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
+ end
+ assert_equal "#{Digest::SHA1.hexdigest('foo')} ./foo\n", err
+ assert_equal '', out
+ assert_equal false, File.exists?('tmp/foo')
+ end
+
+ def test_block_to_file
+ remove_tmp_foo
+ out, err = capture_subprocess_io do
+ assert_arv_get @@foo_manifest_locator, 'tmp/foo'
+ end
+ assert_equal '', err
+ assert_equal '', out
+
+ digest = Digest::MD5.hexdigest('foo')
+ !(IO.read('tmp/foo')).gsub!( /^(. #{digest}+3)(.*)( 0:3:foo)$/).nil?
+ end
+
+ def test_create_directory_tree
+ `rm -rf ./tmp/arv-get-test/`
+ Dir.mkdir './tmp/arv-get-test'
+ out, err = capture_subprocess_io do
+ assert_arv_get @@multilevel_manifest_locator + '/', 'tmp/arv-get-test/'
+ end
+ assert_equal '', err
+ assert_equal '', out
+ assert_equal 'baz', IO.read('tmp/arv-get-test/foo/bar/baz')
+ end
+
+ def test_create_partial_directory_tree
+ `rm -rf ./tmp/arv-get-test/`
+ Dir.mkdir './tmp/arv-get-test'
+ out, err = capture_subprocess_io do
+ assert_arv_get(@@multilevel_manifest_locator + '/foo/',
+ 'tmp/arv-get-test/')
+ end
+ assert_equal '', err
+ assert_equal '', out
+ assert_equal 'baz', IO.read('tmp/arv-get-test/bar/baz')
+ end
+
+ protected
+ def assert_arv_get(*args)
+ expect = case args.first
+ when true, false
+ args.shift
+ else
+ true
+ end
+ assert_equal(expect,
+ system(['./bin/arv-get', 'arv-get'], *args),
+ "`arv-get #{args.join ' '}` " +
+ "should exit #{if expect then 0 else 'non-zero' end}")
+ end
+
+ def remove_tmp_foo
+ begin
+ File.unlink('tmp/foo')
+ rescue Errno::ENOENT
+ end
+ end
+end
require 'minitest/autorun'
require 'digest/md5'
-class TestArvPut < Minitest::Test
+class TestArvKeepPut < Minitest::Test
def setup
begin Dir.mkdir './tmp' rescue Errno::EEXIST end
begin Dir.mkdir './tmp/empty_dir' rescue Errno::EEXIST end
'bin/cwl-runner'
],
install_requires=[
- 'cwltool',
+ 'cwltool==1.0.20150722144138',
'arvados-python-client'
],
zip_safe=True,
func (s *ServerRequiredSuite) SetUpSuite(c *C) {
arvadostest.StartAPI()
- arvadostest.StartKeep()
+ arvadostest.StartKeep(2, false)
}
func (s *ServerRequiredSuite) SetUpTest(c *C) {
"log"
"os"
"os/exec"
+ "strconv"
"strings"
)
exec.Command("python", "run_test_server.py", "stop").Run()
}
-func StartKeep() {
+// StartKeep starts the given number of keep servers,
+// optionally with -enforce-permissions enabled.
+// Use numKeepServers = 2 and enforcePermissions = false under all normal circumstances.
+func StartKeep(numKeepServers int, enforcePermissions bool) {
cwd, _ := os.Getwd()
defer os.Chdir(cwd)
chdirToPythonTests()
- cmd := exec.Command("python", "run_test_server.py", "start_keep")
+ cmdArgs := []string{"run_test_server.py", "start_keep", "--num-keep-servers", strconv.Itoa(numKeepServers)}
+ if enforcePermissions {
+ cmdArgs = append(cmdArgs, "--keep-enforce-permissions")
+ }
+
+ cmd := exec.Command("python", cmdArgs...)
+
stderr, err := cmd.StderrPipe()
if err != nil {
log.Fatalf("Setting up stderr pipe: %s", err)
}
}
-func StopKeep() {
+// StopKeep stops keep servers that were started with StartKeep.
+// numkeepServers should be the same value that was passed to StartKeep,
+// which is 2 under all normal circumstances.
+func StopKeep(numKeepServers int) {
cwd, _ := os.Getwd()
defer os.Chdir(cwd)
chdirToPythonTests()
- exec.Command("python", "run_test_server.py", "stop_keep").Run()
+ exec.Command("python", "run_test_server.py", "stop_keep", "--num-keep-servers", strconv.Itoa(numKeepServers))
}
package keepclient
import (
+ "bytes"
"crypto/md5"
"crypto/tls"
"errors"
"io/ioutil"
"log"
"net/http"
- "os"
"regexp"
"strconv"
"strings"
var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
var InvalidLocatorError = errors.New("Invalid locator")
+// ErrNoSuchKeepServer is returned when GetIndex is invoked with a UUID with no matching keep server
+var ErrNoSuchKeepServer = errors.New("No keep server matching the given UUID is found")
+
+// ErrIncompleteIndex is returned when the Index response does not end with a new empty line
+var ErrIncompleteIndex = errors.New("Got incomplete index")
+
const X_Keep_Desired_Replicas = "X-Keep-Desired-Replicas"
const X_Keep_Replicas_Stored = "X-Keep-Replicas-Stored"
// Information about Arvados and Keep servers.
type KeepClient struct {
- Arvados *arvadosclient.ArvadosClient
- Want_replicas int
- Using_proxy bool
- localRoots *map[string]string
+ Arvados *arvadosclient.ArvadosClient
+ Want_replicas int
+ Using_proxy bool
+ localRoots *map[string]string
writableLocalRoots *map[string]string
- gatewayRoots *map[string]string
- lock sync.RWMutex
- Client *http.Client
+ gatewayRoots *map[string]string
+ lock sync.RWMutex
+ Client *http.Client
+ Retries int
+
+ // set to 1 if all writable services are of disk type, otherwise 0
+ replicasPerService int
}
-// Create a new KeepClient. This will contact the API server to discover Keep
-// servers.
+// MakeKeepClient creates a new KeepClient by contacting the API server to discover Keep servers.
func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
- var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
- insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+ kc := New(arv)
+ return kc, kc.DiscoverKeepServers()
+}
+
+// New func creates a new KeepClient struct.
+// This func does not discover keep servers. It is the caller's responsibility.
+func New(arv *arvadosclient.ArvadosClient) *KeepClient {
+ defaultReplicationLevel := 2
+ value, err := arv.Discovery("defaultCollectionReplication")
+ if err == nil {
+ v, ok := value.(float64)
+ if ok && v > 0 {
+ defaultReplicationLevel = int(v)
+ }
+ }
+
kc := &KeepClient{
Arvados: arv,
- Want_replicas: 2,
+ Want_replicas: defaultReplicationLevel,
Using_proxy: false,
Client: &http.Client{Transport: &http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: arv.ApiInsecure}}},
+ Retries: 2,
}
- return kc, kc.DiscoverKeepServers()
+ return kc
}
// Put a block given the block hash, a reader, and the number of bytes
}
}
-// Get() retrieves a block, given a locator. Returns a reader, the
-// expected data length, the URL the block is being fetched from, and
-// an error.
-//
-// If the block checksum does not match, the final Read() on the
-// reader returned by this method will return a BadChecksum error
-// instead of EOF.
-func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
+func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, int64, string, error) {
var errs []string
- for _, host := range kc.getSortedRoots(locator) {
- url := host + "/" + locator
- req, err := http.NewRequest("GET", url, nil)
- if err != nil {
- continue
- }
- req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
- resp, err := kc.Client.Do(req)
- if err != nil || resp.StatusCode != http.StatusOK {
- if resp != nil {
+
+ tries_remaining := 1 + kc.Retries
+ serversToTry := kc.getSortedRoots(locator)
+ var retryList []string
+
+ for tries_remaining > 0 {
+ tries_remaining -= 1
+ retryList = nil
+
+ for _, host := range serversToTry {
+ url := host + "/" + locator
+
+ req, err := http.NewRequest(method, url, nil)
+ if err != nil {
+ errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+ continue
+ }
+ req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+ resp, err := kc.Client.Do(req)
+ if err != nil {
+ // Probably a network error, may be transient,
+ // can try again.
+ errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+ retryList = append(retryList, host)
+ } else if resp.StatusCode != http.StatusOK {
var respbody []byte
- if resp.Body != nil {
- respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+ respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+ resp.Body.Close()
+ errs = append(errs, fmt.Sprintf("%s: HTTP %d %q",
+ url, resp.StatusCode, bytes.TrimSpace(respbody)))
+
+ if resp.StatusCode == 408 ||
+ resp.StatusCode == 429 ||
+ resp.StatusCode >= 500 {
+ // Timeout, too many requests, or other
+ // server side failure, transient
+ // error, can try again.
+ retryList = append(retryList, host)
}
- errs = append(errs, fmt.Sprintf("%s: %d %s",
- url, resp.StatusCode, strings.TrimSpace(string(respbody))))
} else {
- errs = append(errs, fmt.Sprintf("%s: %v", url, err))
+ // Success.
+ if method == "GET" {
+ return HashCheckingReader{
+ Reader: resp.Body,
+ Hash: md5.New(),
+ Check: locator[0:32],
+ }, resp.ContentLength, url, nil
+ } else {
+ resp.Body.Close()
+ return nil, resp.ContentLength, url, nil
+ }
}
- continue
+
}
- return HashCheckingReader{
- Reader: resp.Body,
- Hash: md5.New(),
- Check: locator[0:32],
- }, resp.ContentLength, url, nil
+ serversToTry = retryList
}
- log.Printf("DEBUG: GET %s failed: %v", locator, errs)
+ log.Printf("DEBUG: %s %s failed: %v", method, locator, errs)
+
return nil, 0, "", BlockNotFound
}
+// Get() retrieves a block, given a locator. Returns a reader, the
+// expected data length, the URL the block is being fetched from, and
+// an error.
+//
+// If the block checksum does not match, the final Read() on the
+// reader returned by this method will return a BadChecksum error
+// instead of EOF.
+func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
+ return kc.getOrHead("GET", locator)
+}
+
// Ask() verifies that a block with the given hash is available and
// readable, according to at least one Keep service. Unlike Get, it
// does not retrieve the data or verify that the data content matches
// Returns the data size (content length) reported by the Keep service
// and the URI reporting the data size.
func (kc *KeepClient) Ask(locator string) (int64, string, error) {
- for _, host := range kc.getSortedRoots(locator) {
- url := host + "/" + locator
- req, err := http.NewRequest("HEAD", url, nil)
- if err != nil {
- continue
- }
- req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
- if resp, err := kc.Client.Do(req); err == nil && resp.StatusCode == http.StatusOK {
- return resp.ContentLength, url, nil
- }
+ _, size, url, err := kc.getOrHead("HEAD", locator)
+ return size, url, err
+}
+
+// GetIndex retrieves a list of blocks stored on the given server whose hashes
+// begin with the given prefix. The returned reader will return an error (other
+// than EOF) if the complete index cannot be retrieved.
+//
+// This is meant to be used only by system components and admin tools.
+// It will return an error unless the client is using a "data manager token"
+// recognized by the Keep services.
+func (kc *KeepClient) GetIndex(keepServiceUUID, prefix string) (io.Reader, error) {
+ url := kc.LocalRoots()[keepServiceUUID]
+ if url == "" {
+ return nil, ErrNoSuchKeepServer
+ }
+
+ url += "/index"
+ if prefix != "" {
+ url += "/" + prefix
+ }
+
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+ resp, err := kc.Client.Do(req)
+ if err != nil {
+ return nil, err
}
- return 0, "", BlockNotFound
+
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("Got http status code: %d", resp.StatusCode)
+ }
+
+ var respBody []byte
+ respBody, err = ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ // Got index; verify that it is complete
+ // The response should be "\n" if no locators matched the prefix
+ // Else, it should be a list of locators followed by a blank line
+ if !bytes.Equal(respBody, []byte("\n")) && !bytes.HasSuffix(respBody, []byte("\n\n")) {
+ return nil, ErrIncompleteIndex
+ }
+
+ // Got complete index; strip the trailing newline and send
+ return bytes.NewReader(respBody[0 : len(respBody)-1]), nil
}
// LocalRoots() returns the map of local (i.e., disk and proxy) Keep
return
}
arvadostest.StartAPI()
- arvadostest.StartKeep()
+ arvadostest.StartKeep(2, false)
}
func (s *ServerRequiredSuite) TearDownSuite(c *C) {
if *no_server {
return
}
- arvadostest.StopKeep()
+ arvadostest.StopKeep(2)
arvadostest.StopAPI()
}
}
}
+func (s *ServerRequiredSuite) TestDefaultReplications(c *C) {
+ arv, err := arvadosclient.MakeArvadosClient()
+ c.Assert(err, Equals, nil)
+
+ kc, err := MakeKeepClient(&arv)
+ c.Assert(kc.Want_replicas, Equals, 2)
+
+ arv.DiscoveryDoc["defaultCollectionReplication"] = 3.0
+ kc, err = MakeKeepClient(&arv)
+ c.Assert(kc.Want_replicas, Equals, 3)
+
+ arv.DiscoveryDoc["defaultCollectionReplication"] = 1.0
+ kc, err = MakeKeepClient(&arv)
+ c.Assert(kc.Want_replicas, Equals, 1)
+}
+
type StubPutHandler struct {
c *C
expectPath string
fh.handled <- fmt.Sprintf("http://%s", req.Host)
}
+type FailThenSucceedHandler struct {
+ handled chan string
+ count int
+ successhandler StubGetHandler
+}
+
+func (fh *FailThenSucceedHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+ if fh.count == 0 {
+ resp.WriteHeader(500)
+ fh.count += 1
+ fh.handled <- fmt.Sprintf("http://%s", req.Host)
+ } else {
+ fh.successhandler.ServeHTTP(resp, req)
+ }
+}
+
+type Error404Handler struct {
+ handled chan string
+}
+
+func (fh Error404Handler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+ resp.WriteHeader(404)
+ fh.handled <- fmt.Sprintf("http://%s", req.Host)
+}
+
func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
log.Printf("TestFailedUploadToStubKeepServer")
arv, err := arvadosclient.MakeArvadosClient()
kc, _ := MakeKeepClient(&arv)
arv.ApiToken = "abc123"
- kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
r, n, url2, err := kc.Get(hash)
defer r.Close()
log.Printf("TestGet done")
}
+func (s *StandaloneSuite) TestGet404(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ st := Error404Handler{make(chan string, 1)}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ r, n, url2, err := kc.Get(hash)
+ c.Check(err, Equals, BlockNotFound)
+ c.Check(n, Equals, int64(0))
+ c.Check(url2, Equals, "")
+ c.Check(r, Equals, nil)
+}
+
func (s *StandaloneSuite) TestGetFail(c *C) {
hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
arv, err := arvadosclient.MakeArvadosClient()
kc, _ := MakeKeepClient(&arv)
arv.ApiToken = "abc123"
- kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ r, n, url2, err := kc.Get(hash)
+ c.Check(err, Equals, BlockNotFound)
+ c.Check(n, Equals, int64(0))
+ c.Check(url2, Equals, "")
+ c.Check(r, Equals, nil)
+}
+
+func (s *StandaloneSuite) TestGetFailRetry(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ st := &FailThenSucceedHandler{make(chan string, 1), 0,
+ StubGetHandler{
+ c,
+ hash,
+ "abc123",
+ http.StatusOK,
+ []byte("foo")}}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ r, n, url2, err := kc.Get(hash)
+ defer r.Close()
+ c.Check(err, Equals, nil)
+ c.Check(n, Equals, int64(3))
+ c.Check(url2, Equals, fmt.Sprintf("%s/%s", ks.url, hash))
+
+ content, err2 := ioutil.ReadAll(r)
+ c.Check(err2, Equals, nil)
+ c.Check(content, DeepEquals, []byte("foo"))
+}
+
+func (s *StandaloneSuite) TestGetNetError(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": "http://localhost:62222"}, nil, nil)
r, n, url2, err := kc.Get(hash)
c.Check(err, Equals, BlockNotFound)
arv.ApiToken = "abc123"
kc.SetServiceRoots(
map[string]string{"x": ks0.url},
- map[string]string{"x": ks0.url},
+ nil,
map[string]string{uuid: ks.url})
r, n, uri, err := kc.Get(hash + "+K@" + uuid)
"zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
"zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
uuid: ks.url},
- map[string]string{
- "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
- "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
- "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
- uuid: ks.url},
+ nil,
map[string]string{
"zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
"zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
arv.ApiToken = "abc123"
kc.SetServiceRoots(
map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
- map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
+ nil,
map[string]string{uuid: ksGateway.url})
r, n, uri, err := kc.Get(hash + "+K@" + uuid)
arv, err := arvadosclient.MakeArvadosClient()
kc, _ := MakeKeepClient(&arv)
arv.ApiToken = "abc123"
- kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
r, n, _, err := kc.Get(barhash)
_, err = ioutil.ReadAll(r)
content := []byte("waz")
hash := fmt.Sprintf("%x", md5.Sum(content))
- fh := FailHandler{
+ fh := Error404Handler{
make(chan string, 4)}
st := StubGetHandler{
}
{
hash2, replicas, err := kc.PutB(content)
- c.Check(hash2, Equals, fmt.Sprintf("%s+%d", hash, len(content)))
+ c.Check(hash2, Matches, fmt.Sprintf(`%s\+%d\b.*`, hash, len(content)))
c.Check(replicas, Equals, 2)
c.Check(err, Equals, nil)
}
c.Check(err, Equals, InsufficientReplicasError)
c.Check(replicas, Equals, 0)
}
+
+type StubGetIndexHandler struct {
+ c *C
+ expectPath string
+ expectAPIToken string
+ httpStatus int
+ body []byte
+}
+
+func (h StubGetIndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+ h.c.Check(req.URL.Path, Equals, h.expectPath)
+ h.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", h.expectAPIToken))
+ resp.WriteHeader(h.httpStatus)
+ resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(h.body)))
+ resp.Write(h.body)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoPrefix(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ st := StubGetIndexHandler{
+ c,
+ "/index",
+ "abc123",
+ http.StatusOK,
+ []byte(hash + "+3 1443559274\n\n")}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ r, err := kc.GetIndex("x", "")
+ c.Check(err, Equals, nil)
+
+ content, err2 := ioutil.ReadAll(r)
+ c.Check(err2, Equals, nil)
+ c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+func (s *StandaloneSuite) TestGetIndexWithPrefix(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ st := StubGetIndexHandler{
+ c,
+ "/index/" + hash[0:3],
+ "abc123",
+ http.StatusOK,
+ []byte(hash + "+3 1443559274\n\n")}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ r, err := kc.GetIndex("x", hash[0:3])
+ c.Check(err, Equals, nil)
+
+ content, err2 := ioutil.ReadAll(r)
+ c.Check(err2, Equals, nil)
+ c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
+
+func (s *StandaloneSuite) TestGetIndexIncomplete(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ st := StubGetIndexHandler{
+ c,
+ "/index/" + hash[0:3],
+ "abc123",
+ http.StatusOK,
+ []byte(hash)}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ _, err = kc.GetIndex("x", hash[0:3])
+ c.Check(err, Equals, ErrIncompleteIndex)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoSuchServer(c *C) {
+ hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+ st := StubGetIndexHandler{
+ c,
+ "/index/" + hash[0:3],
+ "abc123",
+ http.StatusOK,
+ []byte(hash)}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ _, err = kc.GetIndex("y", hash[0:3])
+ c.Check(err, Equals, ErrNoSuchKeepServer)
+}
+
+func (s *StandaloneSuite) TestGetIndexWithNoSuchPrefix(c *C) {
+ st := StubGetIndexHandler{
+ c,
+ "/index/abcd",
+ "abc123",
+ http.StatusOK,
+ []byte("\n")}
+
+ ks := RunFakeKeepServer(st)
+ defer ks.listener.Close()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(&arv)
+ arv.ApiToken = "abc123"
+ kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+ r, err := kc.GetIndex("x", "abcd")
+ c.Check(err, Equals, nil)
+
+ content, err2 := ioutil.ReadAll(r)
+ c.Check(err2, Equals, nil)
+ c.Check(content, DeepEquals, st.body[0:len(st.body)-1])
+}
--- /dev/null
+// Generate and verify permission signatures for Keep locators.
+//
+// See https://dev.arvados.org/projects/arvados/wiki/Keep_locator_format
+
+package keepclient
+
+import (
+ "crypto/hmac"
+ "crypto/sha1"
+ "errors"
+ "fmt"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+)
+
+var (
+ // ErrSignatureExpired - a signature was rejected because the
+ // expiry time has passed.
+ ErrSignatureExpired = errors.New("Signature expired")
+ // ErrSignatureInvalid - a signature was rejected because it
+ // was badly formatted or did not match the given secret key.
+ ErrSignatureInvalid = errors.New("Invalid signature")
+ // ErrSignatureMissing - the given locator does not have a
+ // signature hint.
+ ErrSignatureMissing = errors.New("Missing signature")
+)
+
+// makePermSignature generates a SHA-1 HMAC digest for the given blob,
+// token, expiry, and site secret.
+func makePermSignature(blobHash, apiToken, expiry string, permissionSecret []byte) string {
+ hmac := hmac.New(sha1.New, permissionSecret)
+ hmac.Write([]byte(blobHash))
+ hmac.Write([]byte("@"))
+ hmac.Write([]byte(apiToken))
+ hmac.Write([]byte("@"))
+ hmac.Write([]byte(expiry))
+ digest := hmac.Sum(nil)
+ return fmt.Sprintf("%x", digest)
+}
+
+// SignLocator returns blobLocator with a permission signature
+// added. If either permissionSecret or apiToken is empty, blobLocator
+// is returned untouched.
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func SignLocator(blobLocator, apiToken string, expiry time.Time, permissionSecret []byte) string {
+ if len(permissionSecret) == 0 || apiToken == "" {
+ return blobLocator
+ }
+ // Strip off all hints: only the hash is used to sign.
+ blobHash := strings.Split(blobLocator, "+")[0]
+ timestampHex := fmt.Sprintf("%08x", expiry.Unix())
+ return blobLocator +
+ "+A" + makePermSignature(blobHash, apiToken, timestampHex, permissionSecret) +
+ "@" + timestampHex
+}
+
+var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
+
+// VerifySignature returns nil if the signature on the signedLocator
+// can be verified using the given apiToken. Otherwise it returns
+// ErrSignatureExpired (if the signature's expiry time has passed,
+// which is something the client could have figured out
+// independently), ErrSignatureMissing (if there is no signature hint
+// at all), or ErrSignatureInvalid (if the signature is present but
+// badly formatted or incorrect).
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func VerifySignature(signedLocator, apiToken string, permissionSecret []byte) error {
+ matches := signedLocatorRe.FindStringSubmatch(signedLocator)
+ if matches == nil {
+ return ErrSignatureMissing
+ }
+ blobHash := matches[1]
+ signatureHex := matches[2]
+ expiryHex := matches[3]
+ if expiryTime, err := parseHexTimestamp(expiryHex); err != nil {
+ return ErrSignatureInvalid
+ } else if expiryTime.Before(time.Now()) {
+ return ErrSignatureExpired
+ }
+ if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, permissionSecret) {
+ return ErrSignatureInvalid
+ }
+ return nil
+}
+
+func parseHexTimestamp(timestampHex string) (ts time.Time, err error) {
+ if tsInt, e := strconv.ParseInt(timestampHex, 16, 0); e == nil {
+ ts = time.Unix(tsInt, 0)
+ } else {
+ err = e
+ }
+ return ts, err
+}
--- /dev/null
+package keepclient
+
+import (
+ "testing"
+ "time"
+)
+
+const (
+ knownHash = "acbd18db4cc2f85cedef654fccc4a4d8"
+ knownLocator = knownHash + "+3"
+ knownToken = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
+ knownKey = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
+ "p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
+ "ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
+ "jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
+ "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
+ "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
+ "786u5rw2a9gx743dj3fgq2irk"
+ knownSignature = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+ knownTimestamp = "7fffffff"
+ knownSigHint = "+A" + knownSignature + "@" + knownTimestamp
+ knownSignedLocator = knownLocator + knownSigHint
+)
+
+func TestSignLocator(t *testing.T) {
+ if ts, err := parseHexTimestamp(knownTimestamp); err != nil {
+ t.Errorf("bad knownTimestamp %s", knownTimestamp)
+ } else {
+ if knownSignedLocator != SignLocator(knownLocator, knownToken, ts, []byte(knownKey)) {
+ t.Fail()
+ }
+ }
+}
+
+func TestVerifySignature(t *testing.T) {
+ if VerifySignature(knownSignedLocator, knownToken, []byte(knownKey)) != nil {
+ t.Fail()
+ }
+}
+
+func TestVerifySignatureExtraHints(t *testing.T) {
+ if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+ t.Fatal("Verify cannot handle hint before permission signature")
+ }
+
+ if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+ t.Fatal("Verify cannot handle hint after permission signature")
+ }
+
+ if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+ t.Fatal("Verify cannot handle hints around permission signature")
+ }
+}
+
+// The size hint on the locator string should not affect signature validation.
+func TestVerifySignatureWrongSize(t *testing.T) {
+ if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+ t.Fatal("Verify cannot handle incorrect size hint")
+ }
+
+ if VerifySignature(knownHash+knownSigHint, knownToken, []byte(knownKey)) != nil {
+ t.Fatal("Verify cannot handle missing size hint")
+ }
+}
+
+func TestVerifySignatureBadSig(t *testing.T) {
+ badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
+ if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+ t.Fail()
+ }
+}
+
+func TestVerifySignatureBadTimestamp(t *testing.T) {
+ badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
+ if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+ t.Fail()
+ }
+}
+
+func TestVerifySignatureBadSecret(t *testing.T) {
+ if VerifySignature(knownSignedLocator, knownToken, []byte("00000000000000000000")) != ErrSignatureInvalid {
+ t.Fail()
+ }
+}
+
+func TestVerifySignatureBadToken(t *testing.T) {
+ if VerifySignature(knownSignedLocator, "00000000", []byte(knownKey)) != ErrSignatureInvalid {
+ t.Fail()
+ }
+}
+
+func TestVerifySignatureExpired(t *testing.T) {
+ yesterday := time.Now().AddDate(0, 0, -1)
+ expiredLocator := SignLocator(knownHash, knownToken, yesterday, []byte(knownKey))
+ if VerifySignature(expiredLocator, knownToken, []byte(knownKey)) != ErrSignatureExpired {
+ t.Fail()
+ }
+}
import (
"crypto/md5"
+ "encoding/json"
"errors"
"fmt"
"git.curoverse.com/arvados.git/sdk/go/streamer"
"time"
)
-type keepDisk struct {
+type keepService struct {
Uuid string `json:"uuid"`
Hostname string `json:"service_host"`
Port int `json:"service_port"`
ReadOnly bool `json:"read_only"`
}
+// Md5String returns md5 hash for the bytes in the given string
func Md5String(s string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(s)))
}
TLSHandshakeTimeout: 10 * time.Second,
}
}
-
}
// Set timeouts apply when connecting to keepstore services directly (assumed
// to be on the local network).
-func (this *KeepClient) setClientSettingsStore() {
+func (this *KeepClient) setClientSettingsDisk() {
if this.Client.Timeout == 0 {
// Maximum time to wait for a complete response
this.Client.Timeout = 20 * time.Second
}
}
+type svcList struct {
+ Items []keepService `json:"items"`
+}
+
+// DiscoverKeepServers gets list of available keep services from api server
func (this *KeepClient) DiscoverKeepServers() error {
- type svcList struct {
- Items []keepDisk `json:"items"`
+ var list svcList
+
+ // Get keep services from api server
+ err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
+ if err != nil {
+ return err
}
- var m svcList
- err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &m)
+ return this.loadKeepServers(list)
+}
- if err != nil {
- if err := this.Arvados.List("keep_disks", nil, &m); err != nil {
- return err
- }
+// LoadKeepServicesFromJSON gets list of available keep services from given JSON
+func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
+ var list svcList
+
+ // Load keep services from given json
+ dec := json.NewDecoder(strings.NewReader(services))
+ if err := dec.Decode(&list); err != nil {
+ return err
}
+ return this.loadKeepServers(list)
+}
+
+// loadKeepServers
+func (this *KeepClient) loadKeepServers(list svcList) error {
listed := make(map[string]bool)
localRoots := make(map[string]string)
gatewayRoots := make(map[string]string)
writableLocalRoots := make(map[string]string)
- for _, service := range m.Items {
+ // replicasPerService is 1 for disks; unknown or unlimited otherwise
+ this.replicasPerService = 1
+ this.Using_proxy = false
+
+ for _, service := range list.Items {
scheme := "http"
if service.SSL {
scheme = "https"
}
listed[url] = true
- switch service.SvcType {
- case "disk":
- localRoots[service.Uuid] = url
- case "proxy":
- localRoots[service.Uuid] = url
+ localRoots[service.Uuid] = url
+ if service.SvcType == "proxy" {
this.Using_proxy = true
}
if service.ReadOnly == false {
writableLocalRoots[service.Uuid] = url
+ if service.SvcType != "disk" {
+ this.replicasPerService = 0
+ }
}
// Gateway services are only used when specified by
if this.Using_proxy {
this.setClientSettingsProxy()
} else {
- this.setClientSettingsStore()
+ this.setClientSettingsDisk()
}
this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
req.Header.Add("Content-Type", "application/octet-stream")
-
- if this.Using_proxy {
- req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
- }
+ req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
var resp *http.Response
if resp, err = this.Client.Do(req); err != nil {
// Used to communicate status from the upload goroutines
upload_status := make(chan uploadStatus)
- defer close(upload_status)
+ defer func() {
+ // Wait for any abandoned uploads (e.g., we started
+ // two uploads and the first replied with replicas=2)
+ // to finish before closing the status channel.
+ go func() {
+ for active > 0 {
+ <-upload_status
+ }
+ close(upload_status)
+ }()
+ }()
// Desired number of replicas
remaining_replicas := this.Want_replicas
+ replicasPerThread := this.replicasPerService
+ if replicasPerThread < 1 {
+ // unlimited or unknown
+ replicasPerThread = remaining_replicas
+ }
+
for remaining_replicas > 0 {
- for active < remaining_replicas {
+ for active*replicasPerThread < remaining_replicas {
// Start some upload requests
if next_server < len(sv) {
log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
n, err := sr.Read(out)
c.Check(n, Equals, 100)
+ c.Check(err, IsNil)
n, err = sr.Read(out)
c.Check(n, Equals, 0)
}
}
} else {
- if reader_status == io.EOF {
- // no more reads expected, so this is ok
- } else {
+ if reader_status == nil {
// slices channel closed without signaling EOF
reader_status = io.ErrUnexpectedEOF
}
'arvados-python-client>=0.1.20150801000000',
],
test_suite='tests',
- tests_require=['mock>=1.0', 'python-pam'],
+ tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'],
zip_safe=False,
cmdclass={'egg_info': tagger},
)
@synchronized
def set_state(self, nextstate, val=None):
if (self._state, nextstate) not in self.STATE_TRANSITIONS:
- raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate)
+ raise StateChangeError("Invalid state change from %s to %s" % (self._state, nextstate), self._state, nextstate)
self._state = nextstate
if self._state == _BufferBlock.PENDING:
for i in xrange(0, self.num_put_threads):
thread = threading.Thread(target=self._commit_bufferblock_worker)
self._put_threads.append(thread)
- thread.daemon = False
+ thread.daemon = True
thread.start()
def _block_prefetch_worker(self):
# Mark the block as PENDING so to disallow any more appends.
block.set_state(_BufferBlock.PENDING)
except StateChangeError as e:
- if e.state == _BufferBlock.PENDING and sync:
- block.wait_for_commit.wait()
- if block.state() == _BufferBlock.ERROR:
- raise block.error
- return
+ if e.state == _BufferBlock.PENDING:
+ if sync:
+ block.wait_for_commit.wait()
+ else:
+ return
+ if block.state() == _BufferBlock.COMMITTED:
+ return
+ elif block.state() == _BufferBlock.ERROR:
+ raise block.error
+ else:
+ raise
if sync:
try:
replication=replication)
except (TypeError, ValueError,
arvados.errors.StaleWriterStateError) as error:
- return cls(cache, reporter, bytes_expected, num_retries=num_retries)
+ return cls(cache, reporter, bytes_expected,
+ num_retries=num_retries,
+ replication=replication)
else:
return writer
Should be used in a "with" block.
"""
def __init__(self, todo):
+ self._started = 0
self._todo = todo
self._done = 0
self._response = None
+ self._start_lock = threading.Condition()
self._todo_lock = threading.Semaphore(todo)
self._done_lock = threading.Lock()
+ self._local = threading.local()
def __enter__(self):
+ self._start_lock.acquire()
+ if getattr(self._local, 'sequence', None) is not None:
+ # If the calling thread has used set_sequence(N), then
+ # we wait here until N other threads have started.
+ while self._started < self._local.sequence:
+ self._start_lock.wait()
+ self._started += 1
+ self._start_lock.notifyAll()
self._todo_lock.acquire()
+ self._start_lock.release()
return self
def __exit__(self, type, value, traceback):
self._todo_lock.release()
+ def set_sequence(self, sequence):
+ self._local.sequence = sequence
+
def shall_i_proceed(self):
"""
Return true if the current thread should do stuff. Return
return self._success
def run(self):
- with self.args['thread_limiter'] as limiter:
+ limiter = self.args['thread_limiter']
+ sequence = self.args['thread_sequence']
+ if sequence is not None:
+ limiter.set_sequence(sequence)
+ with limiter:
if not limiter.shall_i_proceed():
# My turn arrived, but the job has been done without
# me.
self._writable_services = self._keep_services
self.using_proxy = True
self._static_services_list = True
+ self.max_replicas_per_service = 1
else:
# It's important to avoid instantiating an API client
# unless we actually need one, for testing's sake.
self._writable_services = None
self.using_proxy = None
self._static_services_list = False
+ self.max_replicas_per_service = 1
def current_timeout(self, attempt_number):
"""Return the appropriate timeout to use for this client.
self.using_proxy = any(ks.get('service_type') == 'proxy'
for ks in self._keep_services)
+ # For disk type services, max_replicas_per_service is 1
+ # It is unknown or unlimited for non-disk typed services.
+ for ks in accessible:
+ if ('disk' != ks.get('service_type')) and (not ks.get('read_only')):
+ self.max_replicas_per_service = None
def _service_weight(self, data_hash, service_uuid):
"""Compute the weight of a Keep service endpoint for a data
self.build_services_list(force_rebuild)
sorted_roots = []
-
# Use the services indicated by the given +K@... remote
# service hints, if any are present and can be resolved to a
# URI.
locator = KeepLocator(loc_s)
headers = {}
- if self.using_proxy:
- # Tell the proxy how many copies we want it to store
- headers['X-Keep-Desired-Replication'] = str(copies)
+ # Tell the proxy how many copies we want it to store
+ headers['X-Keep-Desired-Replication'] = str(copies)
roots_map = {}
- thread_limiter = KeepClient.ThreadLimiter(copies)
+ thread_limiter = KeepClient.ThreadLimiter(1 if self.max_replicas_per_service is None else copies)
loop = retry.RetryLoop(num_retries, self._check_loop_result,
backoff_start=2)
+ thread_sequence = 0
for tries_left in loop:
try:
- local_roots = self.map_new_services(
+ sorted_roots = self.map_new_services(
roots_map, locator,
force_rebuild=(tries_left < num_retries), need_writable=True, **headers)
except Exception as error:
continue
threads = []
- for service_root, ks in roots_map.iteritems():
+ for service_root, ks in [(root, roots_map[root])
+ for root in sorted_roots]:
if ks.finished():
continue
t = KeepClient.KeepWriterThread(
data_hash=data_hash,
service_root=service_root,
thread_limiter=thread_limiter,
- timeout=self.current_timeout(num_retries-tries_left))
+ timeout=self.current_timeout(num_retries-tries_left),
+ thread_sequence=thread_sequence)
t.start()
threads.append(t)
+ thread_sequence += 1
for t in threads:
t.join()
loop.save_result((thread_limiter.done() >= copies, len(threads)))
data_hash, loop.last_result()))
else:
service_errors = ((key, roots_map[key].last_result()['error'])
- for key in local_roots
+ for key in sorted_roots
if roots_map[key].last_result()['error'])
raise arvados.errors.KeepWriteError(
"failed to write {} (wanted {} copies but wrote {})".format(
'ws4py'
],
test_suite='tests',
- tests_require=['mock>=1.0', 'PyYAML'],
+ tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
zip_safe=False,
cmdclass={'egg_info': tagger},
)
for arg, val in keep_args.iteritems():
keep_cmd.append("{}={}".format(arg, val))
+ logf = open(os.path.join(TEST_TMPDIR, 'keep{}.log'.format(n)), 'a+')
kp0 = subprocess.Popen(
- keep_cmd, stdin=open('/dev/null'), stdout=sys.stderr)
+ keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
with open(_pidfile('keep{}'.format(n)), 'w') as f:
f.write(str(kp0.pid))
return port
-def run_keep(blob_signing_key=None, enforce_permissions=False):
- stop_keep()
+def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
+ stop_keep(num_servers)
keep_args = {}
- if blob_signing_key:
- with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
- keep_args['--permission-key-file'] = f.name
- f.write(blob_signing_key)
+ if not blob_signing_key:
+ blob_signing_key = 'zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc'
+ with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
+ keep_args['-blob-signing-key-file'] = f.name
+ f.write(blob_signing_key)
if enforce_permissions:
- keep_args['--enforce-permissions'] = 'true'
+ keep_args['-enforce-permissions'] = 'true'
+ with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
+ keep_args['-data-manager-token-file'] = f.name
+ f.write(os.environ['ARVADOS_API_TOKEN'])
+ keep_args['-never-delete'] = 'false'
api = arvados.api(
version='v1',
host=os.environ['ARVADOS_API_HOST'],
token=os.environ['ARVADOS_API_TOKEN'],
insecure=True)
+
for d in api.keep_services().list().execute()['items']:
api.keep_services().delete(uuid=d['uuid']).execute()
for d in api.keep_disks().list().execute()['items']:
api.keep_disks().delete(uuid=d['uuid']).execute()
- for d in range(0, 2):
+ for d in range(0, num_servers):
port = _start_keep(d, keep_args)
svc = api.keep_services().create(body={'keep_service': {
'uuid': 'zzzzz-bi6l4-keepdisk{:07d}'.format(d),
if os.path.exists(os.path.join(TEST_TMPDIR, "keep.blob_signing_key")):
os.remove(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"))
-def stop_keep():
- _stop_keep(0)
- _stop_keep(1)
+def stop_keep(num_servers=2):
+ for n in range(0, num_servers):
+ _stop_keep(n)
def run_keep_proxy():
if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
parser = argparse.ArgumentParser()
parser.add_argument('action', type=str, help="one of {}".format(actions))
parser.add_argument('--auth', type=str, metavar='FIXTURE_NAME', help='Print authorization info for given api_client_authorizations fixture')
+ parser.add_argument('--num-keep-servers', metavar='int', type=int, default=2, help="Number of keep servers desired")
+ parser.add_argument('--keep-enforce-permissions', action="store_true", help="Enforce keep permissions")
+
args = parser.parse_args()
if args.action not in actions:
elif args.action == 'stop':
stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
elif args.action == 'start_keep':
- run_keep()
+ run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
elif args.action == 'stop_keep':
stop_keep()
elif args.action == 'start_keep_proxy':
# -*- coding: utf-8 -*-
import apiclient
+import mock
import os
import pwd
import re
self.main_stderr = StringIO()
return arv_put.main(args, self.main_stdout, self.main_stderr)
- def call_main_on_test_file(self):
+ def call_main_on_test_file(self, args=[]):
with self.make_test_file() as testfile:
path = testfile.name
- self.call_main_with_args(['--stream', '--no-progress', path])
+ self.call_main_with_args(['--stream', '--no-progress'] + args + [path])
self.assertTrue(
os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
'098f6bcd4621d373cade4e832627b4f6')),
arv_put.ResumeCache.CACHE_DIR = orig_cachedir
os.chmod(cachedir, 0o700)
+ def test_put_block_replication(self):
+ with mock.patch('arvados.collection.KeepClient.local_store_put') as put_mock, \
+ mock.patch('arvados.commands.put.ResumeCache.load') as cache_mock:
+ cache_mock.side_effect = ValueError
+ put_mock.return_value = 'acbd18db4cc2f85cedef654fccc4a4d8+3'
+ self.call_main_on_test_file(['--replication', '1'])
+ self.call_main_on_test_file(['--replication', '4'])
+ self.call_main_on_test_file(['--replication', '5'])
+ self.assertEqual(
+ [x[-1].get('copies') for x in put_mock.call_args_list],
+ [1, 4, 5])
+
def test_normalize(self):
testfile1 = self.make_test_file()
testfile2 = self.make_test_file()
import os
import unittest
import hashlib
+import time
import arvados
from arvados._ranges import Range
self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
self.assertIsNone(bufferblock.buffer_view)
+ def test_bufferblock_commit_pending(self):
+ # Test for bug #7225
+ mockkeep = mock.MagicMock()
+ mockkeep.put.side_effect = lambda x: time.sleep(1)
+ with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+ bufferblock = blockmanager.alloc_bufferblock()
+ bufferblock.append("foo")
+
+ blockmanager.commit_bufferblock(bufferblock, False)
+ self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
+
+ blockmanager.commit_bufferblock(bufferblock, True)
+ self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
+
def test_bufferblock_commit_with_error(self):
mockkeep = mock.MagicMock()
c2.save()
c1.update()
- self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3 7ac66c0f148de9519b8bd264312c4d64\+7\+A[a-f0-9]{40}@[a-f0-9]{8} 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+ self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
if __name__ == '__main__':
mock.responses[0].getopt(pycurl.TIMEOUT_MS),
int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
- def test_probe_order_reference_set(self):
+ def check_no_services_error(self, verb, exc_class):
+ api_client = mock.MagicMock(name='api_client')
+ api_client.keep_services().accessible().execute.side_effect = (
+ arvados.errors.ApiError)
+ keep_client = arvados.KeepClient(api_client=api_client)
+ with self.assertRaises(exc_class) as err_check:
+ getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
+ self.assertEqual(0, len(err_check.exception.request_errors()))
+
+ def test_get_error_with_no_services(self):
+ self.check_no_services_error('get', arvados.errors.KeepReadError)
+
+ def test_put_error_with_no_services(self):
+ self.check_no_services_error('put', arvados.errors.KeepWriteError)
+
+ def check_errors_from_last_retry(self, verb, exc_class):
+ api_client = self.mock_keep_services(count=2)
+ req_mock = tutil.mock_keep_responses(
+ "retry error reporting test", 500, 500, 403, 403)
+ with req_mock, tutil.skip_sleep, \
+ self.assertRaises(exc_class) as err_check:
+ keep_client = arvados.KeepClient(api_client=api_client)
+ getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
+ num_retries=3)
+ self.assertEqual([403, 403], [
+ getattr(error, 'status_code', None)
+ for error in err_check.exception.request_errors().itervalues()])
+
+ def test_get_error_reflects_last_retry(self):
+ self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
+
+ def test_put_error_reflects_last_retry(self):
+ self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
+
+ def test_put_error_does_not_include_successful_puts(self):
+ data = 'partial failure test'
+ data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
+ api_client = self.mock_keep_services(count=3)
+ with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+ self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+ keep_client = arvados.KeepClient(api_client=api_client)
+ keep_client.put(data)
+ self.assertEqual(2, len(exc_check.exception.request_errors()))
+
+ def test_proxy_put_with_no_writable_services(self):
+ data = 'test with no writable services'
+ data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
+ api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
+ with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+ self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+ keep_client = arvados.KeepClient(api_client=api_client)
+ keep_client.put(data)
+ self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
+ self.assertEqual(0, len(exc_check.exception.request_errors()))
+
+
+@tutil.skip_sleep
+class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock):
+
+ def setUp(self):
# expected_order[i] is the probe order for
# hash=md5(sprintf("%064x",i)) where there are 16 services
# with uuid sprintf("anything-%015x",j) with j in 0..15. E.g.,
# the first probe for the block consisting of 64 "0"
# characters is the service whose uuid is
# "zzzzz-bi6l4-000000000000003", so expected_order[0][0]=='3'.
- expected_order = [
+ self.services = 16
+ self.expected_order = [
list('3eab2d5fc9681074'),
list('097dba52e648f1c3'),
list('c5b4e023f8a7d691'),
list('9d81c02e76a3bf54'),
]
- hashes = [
- hashlib.md5("{:064x}".format(x)).hexdigest()
- for x in range(len(expected_order))]
- api_client = self.mock_keep_services(count=16)
- keep_client = arvados.KeepClient(api_client=api_client)
- for i, hash in enumerate(hashes):
- roots = keep_client.weighted_service_roots(arvados.KeepLocator(hash))
+ self.blocks = [
+ "{:064x}".format(x)
+ for x in range(len(self.expected_order))]
+ self.hashes = [
+ hashlib.md5(self.blocks[x]).hexdigest()
+ for x in range(len(self.expected_order))]
+ self.api_client = self.mock_keep_services(count=self.services)
+ self.keep_client = arvados.KeepClient(api_client=self.api_client)
+
+ def test_weighted_service_roots_against_reference_set(self):
+ # Confirm weighted_service_roots() returns the correct order
+ for i, hash in enumerate(self.hashes):
+ roots = self.keep_client.weighted_service_roots(arvados.KeepLocator(hash))
got_order = [
re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1)
for root in roots]
- self.assertEqual(expected_order[i], got_order)
+ self.assertEqual(self.expected_order[i], got_order)
+
+ def test_get_probe_order_against_reference_set(self):
+ self._test_probe_order_against_reference_set(
+ lambda i: self.keep_client.get(self.hashes[i], num_retries=1))
+
+ def test_put_probe_order_against_reference_set(self):
+ # copies=1 prevents the test from being sensitive to races
+ # between writer threads.
+ self._test_probe_order_against_reference_set(
+ lambda i: self.keep_client.put(self.blocks[i], num_retries=1, copies=1))
+
+ def _test_probe_order_against_reference_set(self, op):
+ for i in range(len(self.blocks)):
+ with tutil.mock_keep_responses('', *[500 for _ in range(self.services*2)]) as mock, \
+ self.assertRaises(arvados.errors.KeepRequestError):
+ op(i)
+ got_order = [
+ re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+ for resp in mock.responses]
+ self.assertEqual(self.expected_order[i]*2, got_order)
+
+ def test_put_probe_order_multiple_copies(self):
+ for copies in range(2, 4):
+ for i in range(len(self.blocks)):
+ with tutil.mock_keep_responses('', *[500 for _ in range(self.services*3)]) as mock, \
+ self.assertRaises(arvados.errors.KeepWriteError):
+ self.keep_client.put(self.blocks[i], num_retries=2, copies=copies)
+ got_order = [
+ re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+ for resp in mock.responses]
+ for pos, expected in enumerate(self.expected_order[i]*3):
+ # With C threads racing to make requests, the
+ # position of a given server in the sequence of
+ # HTTP requests (got_order) should be within C-1
+ # positions of that server's position in the
+ # reference probe sequence (expected_order).
+ close_enough = False
+ for diff in range(1-copies, copies):
+ if 0 <= pos+diff < len(got_order):
+ if expected == got_order[pos+diff]:
+ close_enough = True
+ self.assertEqual(
+ True, close_enough,
+ "With copies={}, got {}, expected {}".format(
+ copies, repr(got_order), repr(self.expected_order[i]*3)))
def test_probe_waste_adding_one_server(self):
hashes = [
hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100)]
initial_services = 12
- api_client = self.mock_keep_services(count=initial_services)
- keep_client = arvados.KeepClient(api_client=api_client)
+ self.api_client = self.mock_keep_services(count=initial_services)
+ self.keep_client = arvados.KeepClient(api_client=self.api_client)
probes_before = [
- keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
+ self.keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
for added_services in range(1, 12):
api_client = self.mock_keep_services(count=initial_services+added_services)
keep_client = arvados.KeepClient(api_client=api_client)
data = hashlib.md5(data).hexdigest() + '+1234'
# Arbitrary port number:
aport = random.randint(1024,65535)
- api_client = self.mock_keep_services(service_port=aport, count=16)
+ api_client = self.mock_keep_services(service_port=aport, count=self.services)
keep_client = arvados.KeepClient(api_client=api_client)
with mock.patch('pycurl.Curl') as curl_mock, \
self.assertRaises(exc_class) as err_check:
def test_put_error_shows_probe_order(self):
self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
- def check_no_services_error(self, verb, exc_class):
- api_client = mock.MagicMock(name='api_client')
- api_client.keep_services().accessible().execute.side_effect = (
- arvados.errors.ApiError)
- keep_client = arvados.KeepClient(api_client=api_client)
- with self.assertRaises(exc_class) as err_check:
- getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0')
- self.assertEqual(0, len(err_check.exception.request_errors()))
-
- def test_get_error_with_no_services(self):
- self.check_no_services_error('get', arvados.errors.KeepReadError)
-
- def test_put_error_with_no_services(self):
- self.check_no_services_error('put', arvados.errors.KeepWriteError)
-
- def check_errors_from_last_retry(self, verb, exc_class):
- api_client = self.mock_keep_services(count=2)
- req_mock = tutil.mock_keep_responses(
- "retry error reporting test", 500, 500, 403, 403)
- with req_mock, tutil.skip_sleep, \
- self.assertRaises(exc_class) as err_check:
- keep_client = arvados.KeepClient(api_client=api_client)
- getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
- num_retries=3)
- self.assertEqual([403, 403], [
- getattr(error, 'status_code', None)
- for error in err_check.exception.request_errors().itervalues()])
-
- def test_get_error_reflects_last_retry(self):
- self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
-
- def test_put_error_reflects_last_retry(self):
- self.check_errors_from_last_retry('put', arvados.errors.KeepWriteError)
-
- def test_put_error_does_not_include_successful_puts(self):
- data = 'partial failure test'
- data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
- api_client = self.mock_keep_services(count=3)
- with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
- self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
- keep_client = arvados.KeepClient(api_client=api_client)
- keep_client.put(data)
- self.assertEqual(2, len(exc_check.exception.request_errors()))
-
- def test_proxy_put_with_no_writable_services(self):
- data = 'test with no writable services'
- data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
- api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
- with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
- self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
- keep_client = arvados.KeepClient(api_client=api_client)
- keep_client.put(data)
- self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
- self.assertEqual(0, len(exc_check.exception.request_errors()))
class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
DATA = 'x' * 2**10
gem 'themes_for_rails'
gem 'arvados', '>= 0.1.20150615153458'
-gem 'arvados-cli', '>= 0.1.20150128223752'
+gem 'arvados-cli', '>= 0.1.20150605170031'
# pg_power lets us use partial indexes in schema.rb in Rails 3
gem 'pg_power'
google-api-client (~> 0.6.3, >= 0.6.3)
json (~> 1.7, >= 1.7.7)
jwt (>= 0.1.5, < 1.0.0)
- arvados-cli (0.1.20150205181653)
+ arvados-cli (0.1.20150930141818)
activesupport (~> 3.2, >= 3.2.13)
andand (~> 1.3, >= 1.3.3)
- arvados (~> 0.1, >= 0.1.20150615153458)
+ arvados (~> 0.1, >= 0.1.20150128223554)
curb (~> 0.8)
google-api-client (~> 0.6.3, >= 0.6.3)
json (~> 1.7, >= 1.7.7)
coffee-script-source
execjs
coffee-script-source (1.7.0)
- curb (0.8.6)
+ curb (0.8.8)
daemon_controller (1.2.0)
database_cleaner (1.2.0)
erubis (2.7.0)
acts_as_api
andand
arvados (>= 0.1.20150615153458)
- arvados-cli (>= 0.1.20150128223752)
+ arvados-cli (>= 0.1.20150605170031)
coffee-rails (~> 3.2.0)
database_cleaner
factory_girl_rails
name: "arvados",
version: "v1",
revision: "20131114",
- source_version: (Rails.application.config.source_version ? Rails.application.config.source_version : "No version information available") + (Rails.application.config.local_modified ? Rails.application.config.local_modified.to_s : ''),
+ source_version: AppVersion.hash,
generatedAt: db_current_time.iso8601,
title: "Arvados API",
description: "The API to interact with Arvados.",
api_accessible :superuser, :extend => :user do |t|
end
+ protected
+
+ def permission_to_create
+ current_user.andand.is_admin
+ end
+
+ def permission_to_update
+ current_user.andand.is_admin
+ end
end
default_openid_prefix: https://www.google.com/accounts/o8/id
- # source_version
- source_version: "<%= `git log -n 1 --format=%h`.strip %>"
- local_modified: false
+ # Override the automatic version string. With the default value of
+ # false, the version string is read from git-commit.version in
+ # Rails.root (included in vendor packages) or determined by invoking
+ # "git log".
+ source_version: false
development:
active_record.auto_explain_threshold_in_seconds: 0.5
assets.compress: false
assets.debug: true
- local_modified: "<%= '-modified' if `git status -s` != '' %>"
production:
force_ssl: true
--- /dev/null
+require 'app_version'
--- /dev/null
+# If you change this file, you'll probably also want to make the same
+# changes in apps/workbench/lib/app_version.rb.
+
+class AppVersion
+ def self.git(*args, &block)
+ IO.popen(["git", "--git-dir", ".git"] + args, "r",
+ chdir: Rails.root.join('../..'),
+ err: "/dev/null",
+ &block)
+ end
+
+ def self.forget
+ @hash = nil
+ end
+
+ # Return abbrev commit hash for current code version: "abc1234", or
+ # "abc1234-modified" if there are uncommitted changes. If present,
+ # return contents of {root}/git-commit.version instead.
+ def self.hash
+ if (cached = Rails.configuration.source_version || @hash)
+ return cached
+ end
+
+ # Read the version from our package's git-commit.version file, if available.
+ begin
+ @hash = IO.read(Rails.root.join("git-commit.version")).strip
+ rescue Errno::ENOENT
+ end
+
+ if @hash.nil? or @hash.empty?
+ begin
+ local_modified = false
+ git("status", "--porcelain") do |git_pipe|
+ git_pipe.each_line do |_|
+ local_modified = true
+ # Continue reading the pipe so git doesn't get SIGPIPE.
+ end
+ end
+ if $?.success?
+ git("log", "-n1", "--format=%H") do |git_pipe|
+ git_pipe.each_line do |line|
+ @hash = line.chomp[0...8] + (local_modified ? '-modified' : '')
+ end
+ end
+ end
+ rescue SystemCallError
+ end
+ end
+
+ @hash || "unknown"
+ end
+end
namespace :config do
desc 'Ensure site configuration has all required settings'
task check: :environment do
+ $stderr.puts "%-32s %s" % ["AppVersion (discovered)", AppVersion.hash]
$application_config.sort.each do |k, v|
if ENV.has_key?('QUIET') then
# Make sure we still check for the variable to exist
eval("Rails.configuration.#{k}")
else
- if /(password|secret)/.match(k) then
+ if /(password|secret|signing_key)/.match(k) then
# Make sure we still check for the variable to exist, but don't print the value
eval("Rails.configuration.#{k}")
$stderr.puts "%-32s %s" % [k, '*********']
#!/usr/bin/env ruby
+# We want files written by crunch-dispatch to be writable by other processes
+# with the same GID, see bug #7228
+File.umask(0002)
+
require 'shellwords'
include Process
def run
act_as_system_user
+ User.first.group_permissions
$stderr.puts "dispatch: ready"
while !$signal[:term] or @running.size > 0
read_pipes
require 'trollop'
require './lib/salvage_collection'
+include SalvageCollection
opts = Trollop::options do
banner ''
class Arvados::V1::SchemaControllerTest < ActionController::TestCase
+ setup do forget end
+ teardown do forget end
+ def forget
+ Rails.cache.delete 'arvados_v1_rest_discovery'
+ AppVersion.forget
+ end
+
test "should get fresh discovery document" do
MAX_SCHEMA_AGE = 60
get :index
assert_includes discovery_doc, 'defaultTrashLifetime'
assert_equal discovery_doc['defaultTrashLifetime'], Rails.application.config.default_trash_lifetime
end
+
+ test "discovery document has source_version" do
+ get :index
+ assert_response :success
+ discovery_doc = JSON.parse(@response.body)
+ assert_match /^[0-9a-f]+(-modified)?$/, discovery_doc['source_version']
+ end
+
+ test "discovery document overrides source_version with config" do
+ Rails.configuration.source_version = 'aaa888fff'
+ get :index
+ assert_response :success
+ discovery_doc = JSON.parse(@response.body)
+ assert_equal 'aaa888fff', discovery_doc['source_version']
+ end
end
--- /dev/null
+require 'test_helper'
+
+class AppVersionTest < ActiveSupport::TestCase
+
+ setup do AppVersion.forget end
+
+ teardown do AppVersion.forget end
+
+ test 'invoke git processes only on first call' do
+ AppVersion.expects(:git).
+ with("status", "--porcelain").once.
+ yields " M services/api/README\n"
+ AppVersion.expects(:git).
+ with("log", "-n1", "--format=%H").once.
+ yields "da39a3ee5e6b4b0d3255bfef95601890afd80709\n"
+
+ (0..4).each do
+ v = AppVersion.hash
+ assert_equal 'da39a3ee-modified', v
+ end
+ end
+
+ test 'override with configuration' do
+ Rails.configuration.source_version = 'foobar'
+ assert_equal 'foobar', AppVersion.hash
+ Rails.configuration.source_version = false
+ assert_not_equal 'foobar', AppVersion.hash
+ end
+
+ test 'override with file' do
+ path = Rails.root.join 'git-commit.version'
+ assert(!File.exists?(path),
+ "Packaged version file found in source tree: #{path}")
+ begin
+ File.open(path, 'w') do |f|
+ f.write "0.1.abc123\n"
+ end
+ assert_equal "0.1.abc123", AppVersion.hash
+ ensure
+ File.unlink path
+ end
+ end
+end
require 'test_helper'
class KeepServiceTest < ActiveSupport::TestCase
- # test "the truth" do
- # assert true
- # end
+ test "non-admins cannot create services" do
+ set_user_from_auth :active
+ ks = KeepService.new
+ assert_not_allowed do
+ ks.save
+ end
+ end
+
+ test "non-admins cannot update services" do
+ set_user_from_auth :active
+ ks = keep_services(:proxy)
+ ks.service_port = 64434
+ assert_not_allowed do
+ ks.save
+ end
+ end
+
+ test "admins can create services" do
+ set_user_from_auth :admin
+ ks = KeepService.new
+ assert(ks.save, "saving new service failed")
+ end
+
+ test "admins can update services" do
+ set_user_from_auth :admin
+ ks = keep_services(:proxy)
+ ks.service_port = 64434
+ assert(ks.save, "saving updated service failed")
+ end
end
}
func (s *GitoliteSuite) TestPush(c *check.C) {
- err := s.RunGit(c, activeToken, "push", "active/foo.git")
+ err := s.RunGit(c, activeToken, "push", "active/foo.git", "master:gitolite-push")
c.Check(err, check.Equals, nil)
// Check that the commit hash appears in the gitolite log, as
}
func (s *GitoliteSuite) TestPushUnwritable(c *check.C) {
- err := s.RunGit(c, spectatorToken, "push", "active/foo.git")
+ err := s.RunGit(c, spectatorToken, "push", "active/foo.git", "master:gitolite-push-fail")
c.Check(err, check.ErrorMatches, `.*HTTP code = 403.*`)
}
maxManifestSize uint64
)
+// Collection representation
type Collection struct {
- Uuid string
- OwnerUuid string
+ UUID string
+ OwnerUUID string
ReplicationLevel int
BlockDigestToSize map[blockdigest.BlockDigest]int
TotalSize int
}
+// ReadCollections holds information about collections from API server
type ReadCollections struct {
ReadAllCollections bool
- UuidToCollection map[string]Collection
+ UUIDToCollection map[string]Collection
OwnerToCollectionSize map[string]int
BlockToDesiredReplication map[blockdigest.DigestWithSize]int
- CollectionUuidToIndex map[string]int
- CollectionIndexToUuid []string
+ CollectionUUIDToIndex map[string]int
+ CollectionIndexToUUID []string
BlockToCollectionIndices map[blockdigest.DigestWithSize][]int
}
+// GetCollectionsParams params
type GetCollectionsParams struct {
Client arvadosclient.ArvadosClient
Logger *logger.Logger
BatchSize int
}
+// SdkCollectionInfo holds collection info from api
type SdkCollectionInfo struct {
- Uuid string `json:"uuid"`
- OwnerUuid string `json:"owner_uuid"`
+ UUID string `json:"uuid"`
+ OwnerUUID string `json:"owner_uuid"`
Redundancy int `json:"redundancy"`
ModifiedAt time.Time `json:"modified_at"`
ManifestText string `json:"manifest_text"`
}
+// SdkCollectionList lists collections from api
type SdkCollectionList struct {
ItemsAvailable int `json:"items_available"`
Items []SdkCollectionInfo `json:"items"`
"File to write the heap profiles to. Leave blank to skip profiling.")
}
-// Write the heap profile to a file for later review.
+// WriteHeapProfile writes the heap profile to a file for later review.
// Since a file is expected to only contain a single heap profile this
// function overwrites the previously written profile, so it is safe
// to call multiple times in a single run.
func WriteHeapProfile() {
if heapProfileFilename != "" {
- heap_profile, err := os.Create(heapProfileFilename)
+ heapProfile, err := os.Create(heapProfileFilename)
if err != nil {
log.Fatal(err)
}
- defer heap_profile.Close()
+ defer heapProfile.Close()
- err = pprof.WriteHeapProfile(heap_profile)
+ err = pprof.WriteHeapProfile(heapProfile)
if err != nil {
log.Fatal(err)
}
}
}
+// GetCollectionsAndSummarize gets collections from api and summarizes
func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections) {
results = GetCollections(params)
results.Summarize(params.Logger)
log.Printf("Uuid to Size used: %v", results.OwnerToCollectionSize)
log.Printf("Read and processed %d collections",
- len(results.UuidToCollection))
+ len(results.UUIDToCollection))
// TODO(misha): Add a "readonly" flag. If we're in readonly mode,
// lots of behaviors can become warnings (and obviously we can't
return
}
+// GetCollections gets collections from api
func GetCollections(params GetCollectionsParams) (results ReadCollections) {
if ¶ms.Client == nil {
log.Fatalf("params.Client passed to GetCollections() should " +
// that we don't have to grow the map in most cases.
maxExpectedCollections := int(
float64(initialNumberOfCollectionsAvailable) * 1.01)
- results.UuidToCollection = make(map[string]Collection, maxExpectedCollections)
+ results.UUIDToCollection = make(map[string]Collection, maxExpectedCollections)
if params.Logger != nil {
params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
ProcessCollections(params.Logger,
collections.Items,
defaultReplicationLevel,
- results.UuidToCollection).Format(time.RFC3339)
+ results.UUIDToCollection).Format(time.RFC3339)
// update counts
previousTotalCollections = totalCollections
- totalCollections = len(results.UuidToCollection)
+ totalCollections = len(results.UUIDToCollection)
log.Printf("%d collections read, %d new in last batch, "+
"%s latest modified date, %.0f %d %d avg,max,total manifest size",
return string([]byte(s))
}
+// ProcessCollections read from api server
func ProcessCollections(arvLogger *logger.Logger,
receivedCollections []SdkCollectionInfo,
defaultReplicationLevel int,
- uuidToCollection map[string]Collection) (latestModificationDate time.Time) {
+ UUIDToCollection map[string]Collection) (latestModificationDate time.Time) {
for _, sdkCollection := range receivedCollections {
- collection := Collection{Uuid: StrCopy(sdkCollection.Uuid),
- OwnerUuid: StrCopy(sdkCollection.OwnerUuid),
+ collection := Collection{UUID: StrCopy(sdkCollection.UUID),
+ OwnerUUID: StrCopy(sdkCollection.OwnerUUID),
ReplicationLevel: sdkCollection.Redundancy,
BlockDigestToSize: make(map[blockdigest.BlockDigest]int)}
manifest := manifest.Manifest{sdkCollection.ManifestText}
manifestSize := uint64(len(sdkCollection.ManifestText))
- if _, alreadySeen := uuidToCollection[collection.Uuid]; !alreadySeen {
+ if _, alreadySeen := UUIDToCollection[collection.UUID]; !alreadySeen {
totalManifestSize += manifestSize
}
if manifestSize > maxManifestSize {
blockChannel := manifest.BlockIterWithDuplicates()
for block := range blockChannel {
- if stored_size, stored := collection.BlockDigestToSize[block.Digest]; stored && stored_size != block.Size {
+ if storedSize, stored := collection.BlockDigestToSize[block.Digest]; stored && storedSize != block.Size {
message := fmt.Sprintf(
"Collection %s contains multiple sizes (%d and %d) for block %s",
- collection.Uuid,
- stored_size,
+ collection.UUID,
+ storedSize,
block.Size,
block.Digest)
loggerutil.FatalWithMessage(arvLogger, message)
for _, size := range collection.BlockDigestToSize {
collection.TotalSize += size
}
- uuidToCollection[collection.Uuid] = collection
+ UUIDToCollection[collection.UUID] = collection
// Clear out all the manifest strings that we don't need anymore.
// These hopefully form the bulk of our memory usage.
return
}
+// Summarize the collections read
func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
readCollections.OwnerToCollectionSize = make(map[string]int)
readCollections.BlockToDesiredReplication = make(map[blockdigest.DigestWithSize]int)
- numCollections := len(readCollections.UuidToCollection)
- readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
- readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
+ numCollections := len(readCollections.UUIDToCollection)
+ readCollections.CollectionUUIDToIndex = make(map[string]int, numCollections)
+ readCollections.CollectionIndexToUUID = make([]string, 0, numCollections)
readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
- for _, coll := range readCollections.UuidToCollection {
- collectionIndex := len(readCollections.CollectionIndexToUuid)
- readCollections.CollectionIndexToUuid =
- append(readCollections.CollectionIndexToUuid, coll.Uuid)
- readCollections.CollectionUuidToIndex[coll.Uuid] = collectionIndex
+ for _, coll := range readCollections.UUIDToCollection {
+ collectionIndex := len(readCollections.CollectionIndexToUUID)
+ readCollections.CollectionIndexToUUID =
+ append(readCollections.CollectionIndexToUUID, coll.UUID)
+ readCollections.CollectionUUIDToIndex[coll.UUID] = collectionIndex
- readCollections.OwnerToCollectionSize[coll.OwnerUuid] =
- readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
+ readCollections.OwnerToCollectionSize[coll.OwnerUUID] =
+ readCollections.OwnerToCollectionSize[coll.OwnerUUID] + coll.TotalSize
for block, size := range coll.BlockDigestToSize {
locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
var _ = Suite(&MySuite{})
// This captures the result we expect from
-// ReadCollections.Summarize(). Because CollectionUuidToIndex is
+// ReadCollections.Summarize(). Because CollectionUUIDToIndex is
// indeterminate, we replace BlockToCollectionIndices with
// BlockToCollectionUuids.
type ExpectedSummary struct {
uuidSet := make(map[string]struct{})
summarizedBlockToCollectionUuids[digest] = uuidSet
for _, index := range indices {
- uuidSet[summarized.CollectionIndexToUuid[index]] = struct{}{}
+ uuidSet[summarized.CollectionIndexToUUID[index]] = struct{}{}
}
}
rc.Summarize(nil)
- c := rc.UuidToCollection["col0"]
+ c := rc.UUIDToCollection["col0"]
blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
expected := ExpectedSummary{
- OwnerToCollectionSize: map[string]int{c.OwnerUuid: c.TotalSize},
+ OwnerToCollectionSize: map[string]int{c.OwnerUUID: c.TotalSize},
BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
- BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+ BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.UUID}, blockDigest2: []string{c.UUID}},
}
CompareSummarizedReadCollections(checker, rc, expected)
rc.Summarize(nil)
- c0 := rc.UuidToCollection["col0"]
- c1 := rc.UuidToCollection["col1"]
+ c0 := rc.UUIDToCollection["col0"]
+ c1 := rc.UUIDToCollection["col1"]
blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
expected := ExpectedSummary{
OwnerToCollectionSize: map[string]int{
- c0.OwnerUuid: c0.TotalSize,
- c1.OwnerUuid: c1.TotalSize,
+ c0.OwnerUUID: c0.TotalSize,
+ c1.OwnerUUID: c1.TotalSize,
},
BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{
blockDigest1: 5,
blockDigest3: 8,
},
BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
- blockDigest1: []string{c0.Uuid},
- blockDigest2: []string{c0.Uuid, c1.Uuid},
- blockDigest3: []string{c1.Uuid},
+ blockDigest1: []string{c0.UUID},
+ blockDigest2: []string{c0.UUID, c1.UUID},
+ blockDigest3: []string{c1.UUID},
},
}
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
)
+// TestCollectionSpec with test blocks and desired replication level
type TestCollectionSpec struct {
// The desired replication level
ReplicationLevel int
Blocks []int
}
-// Creates a ReadCollections object for testing based on the give
-// specs. Only the ReadAllCollections and UuidToCollection fields are
-// populated. To populate other fields call rc.Summarize().
+// MakeTestReadCollections creates a ReadCollections object for testing
+// based on the give specs. Only the ReadAllCollections and UUIDToCollection
+// fields are populated. To populate other fields call rc.Summarize().
func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
rc = ReadCollections{
ReadAllCollections: true,
- UuidToCollection: map[string]Collection{},
+ UUIDToCollection: map[string]Collection{},
}
for i, spec := range specs {
c := Collection{
- Uuid: fmt.Sprintf("col%d", i),
- OwnerUuid: fmt.Sprintf("owner%d", i),
+ UUID: fmt.Sprintf("col%d", i),
+ OwnerUUID: fmt.Sprintf("owner%d", i),
ReplicationLevel: spec.ReplicationLevel,
BlockDigestToSize: map[blockdigest.BlockDigest]int{},
}
- rc.UuidToCollection[c.Uuid] = c
+ rc.UUIDToCollection[c.UUID] = c
for _, j := range spec.Blocks {
c.BlockDigestToSize[blockdigest.MakeTestBlockDigest(j)] = j
}
return
}
-// Returns a slice giving the collection index of each collection that
-// was passed in to MakeTestReadCollections. rc.Summarize() must be
-// called before this method, since Summarize() assigns an index to
-// each collection.
+// CollectionIndicesForTesting returns a slice giving the collection
+// index of each collection that was passed in to MakeTestReadCollections.
+// rc.Summarize() must be called before this method, since Summarize()
+// assigns an index to each collection.
func (rc ReadCollections) CollectionIndicesForTesting() (indices []int) {
// TODO(misha): Assert that rc.Summarize() has been called.
- numCollections := len(rc.CollectionIndexToUuid)
+ numCollections := len(rc.CollectionIndexToUUID)
indices = make([]int, numCollections)
for i := 0; i < numCollections; i++ {
- indices[i] = rc.CollectionUuidToIndex[fmt.Sprintf("col%d", i)]
+ indices[i] = rc.CollectionUUIDToIndex[fmt.Sprintf("col%d", i)]
}
return
}
package main
import (
+ "errors"
"flag"
"fmt"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
func main() {
flag.Parse()
if minutesBetweenRuns == 0 {
- err := singlerun()
+ err := singlerun(makeArvadosClient())
if err != nil {
- log.Fatalf("Got an error: %v", err)
+ log.Fatalf("singlerun: %v", err)
}
} else {
waitTime := time.Minute * time.Duration(minutesBetweenRuns)
for {
log.Println("Beginning Run")
- err := singlerun()
+ err := singlerun(makeArvadosClient())
if err != nil {
- log.Printf("Got an error: %v", err)
+ log.Printf("singlerun: %v", err)
}
log.Printf("Sleeping for %d minutes", minutesBetweenRuns)
time.Sleep(waitTime)
}
}
-func singlerun() error {
+func makeArvadosClient() arvadosclient.ArvadosClient {
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
- log.Fatalf("Error setting up arvados client %s", err.Error())
+ log.Fatalf("Error setting up arvados client: %s", err)
}
+ return arv
+}
- if is_admin, err := util.UserIsAdmin(arv); err != nil {
- log.Fatalf("Error querying current arvados user %s", err.Error())
- } else if !is_admin {
- log.Fatalf("Current user is not an admin. Datamanager can only be run by admins.")
+func singlerun(arv arvadosclient.ArvadosClient) error {
+ var err error
+ if isAdmin, err := util.UserIsAdmin(arv); err != nil {
+ return errors.New("Error verifying admin token: " + err.Error())
+ } else if !isAdmin {
+ return errors.New("Current user is not an admin. Datamanager requires a privileged token.")
}
var arvLogger *logger.Logger
if trashErr != nil {
return err
- } else {
- keep.SendTrashLists(keep.GetDataManagerToken(arvLogger), kc, trashLists)
}
+ keep.SendTrashLists(kc, trashLists)
return nil
}
-// Returns a data fetcher that fetches data from remote servers.
+// BuildDataFetcher returns a data fetcher that fetches data from remote servers.
func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher {
return func(arvLogger *logger.Logger,
readCollections *collection.ReadCollections,
--- /dev/null
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
+ "io/ioutil"
+ "net/http"
+ "os"
+ "os/exec"
+ "regexp"
+ "strings"
+ "testing"
+ "time"
+)
+
+const (
+ ActiveUserToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+ AdminToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
+)
+
+var arv arvadosclient.ArvadosClient
+var keepClient *keepclient.KeepClient
+var keepServers []string
+
+func SetupDataManagerTest(t *testing.T) {
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "true")
+
+ // start api and keep servers
+ arvadostest.ResetEnv()
+ arvadostest.StartAPI()
+ arvadostest.StartKeep(2, false)
+
+ arv = makeArvadosClient()
+
+ // keep client
+ keepClient = &keepclient.KeepClient{
+ Arvados: &arv,
+ Want_replicas: 2,
+ Using_proxy: true,
+ Client: &http.Client{},
+ }
+
+ // discover keep services
+ if err := keepClient.DiscoverKeepServers(); err != nil {
+ t.Fatalf("Error discovering keep services: %s", err)
+ }
+ keepServers = []string{}
+ for _, host := range keepClient.LocalRoots() {
+ keepServers = append(keepServers, host)
+ }
+}
+
+func TearDownDataManagerTest(t *testing.T) {
+ arvadostest.StopKeep(2)
+ arvadostest.StopAPI()
+}
+
+func putBlock(t *testing.T, data string) string {
+ locator, _, err := keepClient.PutB([]byte(data))
+ if err != nil {
+ t.Fatalf("Error putting test data for %s %s %v", data, locator, err)
+ }
+ if locator == "" {
+ t.Fatalf("No locator found after putting test data")
+ }
+
+ splits := strings.Split(locator, "+")
+ return splits[0] + "+" + splits[1]
+}
+
+func getBlock(t *testing.T, locator string, data string) {
+ reader, blocklen, _, err := keepClient.Get(locator)
+ if err != nil {
+ t.Fatalf("Error getting test data in setup for %s %s %v", data, locator, err)
+ }
+ if reader == nil {
+ t.Fatalf("No reader found after putting test data")
+ }
+ if blocklen != int64(len(data)) {
+ t.Fatalf("blocklen %d did not match data len %d", blocklen, len(data))
+ }
+
+ all, err := ioutil.ReadAll(reader)
+ if string(all) != data {
+ t.Fatalf("Data read %s did not match expected data %s", string(all), data)
+ }
+}
+
+// Create a collection using arv-put
+func createCollection(t *testing.T, data string) string {
+ tempfile, err := ioutil.TempFile(os.TempDir(), "temp-test-file")
+ defer os.Remove(tempfile.Name())
+
+ _, err = tempfile.Write([]byte(data))
+ if err != nil {
+ t.Fatalf("Error writing to tempfile %v", err)
+ }
+
+ // arv-put
+ output, err := exec.Command("arv-put", "--use-filename", "test.txt", tempfile.Name()).Output()
+ if err != nil {
+ t.Fatalf("Error running arv-put %s", err)
+ }
+
+ uuid := string(output[0:27]) // trim terminating char
+ return uuid
+}
+
+// Get collection locator
+var locatorMatcher = regexp.MustCompile(`^([0-9a-f]{32})\+(\d*)(.*)$`)
+
+func getFirstLocatorFromCollection(t *testing.T, uuid string) string {
+ manifest := getCollection(t, uuid)["manifest_text"].(string)
+
+ locator := strings.Split(manifest, " ")[1]
+ match := locatorMatcher.FindStringSubmatch(locator)
+ if match == nil {
+ t.Fatalf("No locator found in collection manifest %s", manifest)
+ }
+
+ return match[1] + "+" + match[2]
+}
+
+func getCollection(t *testing.T, uuid string) Dict {
+ getback := make(Dict)
+ err := arv.Get("collections", uuid, nil, &getback)
+ if err != nil {
+ t.Fatalf("Error getting collection %s", err)
+ }
+ if getback["uuid"] != uuid {
+ t.Fatalf("Get collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
+ }
+
+ return getback
+}
+
+func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
+ err := arv.Update("collections", uuid, arvadosclient.Dict{
+ "collection": arvadosclient.Dict{
+ paramName: paramValue,
+ },
+ }, &arvadosclient.Dict{})
+
+ if err != nil {
+ t.Fatalf("Error updating collection %s", err)
+ }
+}
+
+type Dict map[string]interface{}
+
+func deleteCollection(t *testing.T, uuid string) {
+ getback := make(Dict)
+ err := arv.Delete("collections", uuid, nil, &getback)
+ if err != nil {
+ t.Fatalf("Error deleting collection %s", err)
+ }
+ if getback["uuid"] != uuid {
+ t.Fatalf("Delete collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
+ }
+}
+
+func dataManagerSingleRun(t *testing.T) {
+ err := singlerun(arv)
+ if err != nil {
+ t.Fatalf("Error during singlerun %s", err)
+ }
+}
+
+func getBlockIndexesForServer(t *testing.T, i int) []string {
+ var indexes []string
+
+ path := keepServers[i] + "/index"
+ client := http.Client{}
+ req, err := http.NewRequest("GET", path, nil)
+ req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+ req.Header.Add("Content-Type", "application/octet-stream")
+ resp, err := client.Do(req)
+ defer resp.Body.Close()
+
+ if err != nil {
+ t.Fatalf("Error during %s %s", path, err)
+ }
+
+ body, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ t.Fatalf("Error reading response from %s %s", path, err)
+ }
+
+ lines := strings.Split(string(body), "\n")
+ for _, line := range lines {
+ indexes = append(indexes, strings.Split(line, " ")...)
+ }
+
+ return indexes
+}
+
+func getBlockIndexes(t *testing.T) [][]string {
+ var indexes [][]string
+
+ for i := 0; i < len(keepServers); i++ {
+ indexes = append(indexes, getBlockIndexesForServer(t, i))
+ }
+ return indexes
+}
+
+func verifyBlocks(t *testing.T, notExpected []string, expected []string, minReplication int) {
+ blocks := getBlockIndexes(t)
+
+ for _, block := range notExpected {
+ for _, idx := range blocks {
+ if valueInArray(block, idx) {
+ t.Fatalf("Found unexpected block %s", block)
+ }
+ }
+ }
+
+ for _, block := range expected {
+ nFound := 0
+ for _, idx := range blocks {
+ if valueInArray(block, idx) {
+ nFound++
+ }
+ }
+ if nFound < minReplication {
+ t.Fatalf("Found %d replicas of block %s, expected >= %d", nFound, block, minReplication)
+ }
+ }
+}
+
+func valueInArray(value string, list []string) bool {
+ for _, v := range list {
+ if value == v {
+ return true
+ }
+ }
+ return false
+}
+
+/*
+Test env uses two keep volumes. The volume names can be found by reading the files
+ ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
+
+The keep volumes are of the dir structure:
+ volumeN/subdir/locator
+*/
+func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
+ // First get rid of any size hints in the locators
+ var trimmedBlockLocators []string
+ for _, block := range oldUnusedBlockLocators {
+ trimmedBlockLocators = append(trimmedBlockLocators, strings.Split(block, "+")[0])
+ }
+
+ // Get the working dir so that we can read keep{n}.volume files
+ wd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("Error getting working dir %s", err)
+ }
+
+ // Now cycle through the two keep volumes
+ oldTime := time.Now().AddDate(0, -2, 0)
+ for i := 0; i < 2; i++ {
+ filename := fmt.Sprintf("%s/../../tmp/keep%d.volume", wd, i)
+ volumeDir, err := ioutil.ReadFile(filename)
+ if err != nil {
+ t.Fatalf("Error reading keep volume file %s %s", filename, err)
+ }
+
+ // Read the keep volume dir structure
+ volumeContents, err := ioutil.ReadDir(string(volumeDir))
+ if err != nil {
+ t.Fatalf("Error reading keep dir %s %s", string(volumeDir), err)
+ }
+
+ // Read each subdir for each of the keep volume dir
+ for _, subdir := range volumeContents {
+ subdirName := fmt.Sprintf("%s/%s", volumeDir, subdir.Name())
+ subdirContents, err := ioutil.ReadDir(string(subdirName))
+ if err != nil {
+ t.Fatalf("Error reading keep dir %s %s", string(subdirName), err)
+ }
+
+ // Now we got to the files. The files are names are the block locators
+ for _, fileInfo := range subdirContents {
+ blockName := fileInfo.Name()
+ myname := fmt.Sprintf("%s/%s", subdirName, blockName)
+ if valueInArray(blockName, trimmedBlockLocators) {
+ err = os.Chtimes(myname, oldTime, oldTime)
+ }
+ }
+ }
+ }
+}
+
+func getStatus(t *testing.T, path string) interface{} {
+ client := http.Client{}
+ req, err := http.NewRequest("GET", path, nil)
+ req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+ req.Header.Add("Content-Type", "application/octet-stream")
+ resp, err := client.Do(req)
+ if err != nil {
+ t.Fatalf("Error during %s %s", path, err)
+ }
+ defer resp.Body.Close()
+
+ var s interface{}
+ json.NewDecoder(resp.Body).Decode(&s)
+
+ return s
+}
+
+// Wait until PullQueue and TrashQueue are empty on all keepServers.
+func waitUntilQueuesFinishWork(t *testing.T) {
+ for _, ks := range keepServers {
+ for done := false; !done; {
+ time.Sleep(100 * time.Millisecond)
+ s := getStatus(t, ks+"/status.json")
+ for _, qName := range []string{"PullQueue", "TrashQueue"} {
+ qStatus := s.(map[string]interface{})[qName].(map[string]interface{})
+ if qStatus["Queued"].(float64)+qStatus["InProgress"].(float64) == 0 {
+ done = true
+ }
+ }
+ }
+ }
+}
+
+/*
+Create some blocks and backdate some of them.
+Also create some collections and delete some of them.
+Verify block indexes.
+*/
+func TestPutAndGetBlocks(t *testing.T) {
+ defer TearDownDataManagerTest(t)
+ SetupDataManagerTest(t)
+
+ // Put some blocks which will be backdated later on
+ // The first one will also be used in a collection and hence should not be deleted when datamanager runs.
+ // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
+ var oldUnusedBlockLocators []string
+ oldUnusedBlockData := "this block will have older mtime"
+ for i := 0; i < 5; i++ {
+ oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
+ }
+ for i := 0; i < 5; i++ {
+ getBlock(t, oldUnusedBlockLocators[i], fmt.Sprintf("%s%d", oldUnusedBlockData, i))
+ }
+
+ // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
+ oldUsedBlockData := "this collection block will have older mtime"
+ oldUsedBlockLocator := putBlock(t, oldUsedBlockData)
+ getBlock(t, oldUsedBlockLocator, oldUsedBlockData)
+
+ // Put some more blocks which will not be backdated; hence they are still new, but not in any collection.
+ // Hence, even though unreferenced, these should not be deleted when datamanager runs.
+ var newBlockLocators []string
+ newBlockData := "this block is newer"
+ for i := 0; i < 5; i++ {
+ newBlockLocators = append(newBlockLocators, putBlock(t, fmt.Sprintf("%s%d", newBlockData, i)))
+ }
+ for i := 0; i < 5; i++ {
+ getBlock(t, newBlockLocators[i], fmt.Sprintf("%s%d", newBlockData, i))
+ }
+
+ // Create a collection that would be deleted later on
+ toBeDeletedCollectionUUID := createCollection(t, "some data for collection creation")
+ toBeDeletedCollectionLocator := getFirstLocatorFromCollection(t, toBeDeletedCollectionUUID)
+
+ // Create another collection that has the same data as the one of the old blocks
+ oldUsedBlockCollectionUUID := createCollection(t, oldUsedBlockData)
+ oldUsedBlockCollectionLocator := getFirstLocatorFromCollection(t, oldUsedBlockCollectionUUID)
+ if oldUsedBlockCollectionLocator != oldUsedBlockLocator {
+ t.Fatalf("Locator of the collection with the same data as old block is different %s", oldUsedBlockCollectionLocator)
+ }
+
+ // Create another collection whose replication level will be changed
+ replicationCollectionUUID := createCollection(t, "replication level on this collection will be reduced")
+ replicationCollectionLocator := getFirstLocatorFromCollection(t, replicationCollectionUUID)
+
+ // Create two collections with same data; one will be deleted later on
+ dataForTwoCollections := "one of these collections will be deleted"
+ oneOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
+ oneOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, oneOfTwoWithSameDataUUID)
+ secondOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
+ secondOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, secondOfTwoWithSameDataUUID)
+ if oneOfTwoWithSameDataLocator != secondOfTwoWithSameDataLocator {
+ t.Fatalf("Locators for both these collections expected to be same: %s %s", oneOfTwoWithSameDataLocator, secondOfTwoWithSameDataLocator)
+ }
+
+ // Verify blocks before doing any backdating / deleting.
+ var expected []string
+ expected = append(expected, oldUnusedBlockLocators...)
+ expected = append(expected, newBlockLocators...)
+ expected = append(expected, toBeDeletedCollectionLocator)
+ expected = append(expected, replicationCollectionLocator)
+ expected = append(expected, oneOfTwoWithSameDataLocator)
+ expected = append(expected, secondOfTwoWithSameDataLocator)
+
+ verifyBlocks(t, nil, expected, 2)
+
+ // Run datamanager in singlerun mode
+ dataManagerSingleRun(t)
+ waitUntilQueuesFinishWork(t)
+
+ verifyBlocks(t, nil, expected, 2)
+
+ // Backdate the to-be old blocks and delete the collections
+ backdateBlocks(t, oldUnusedBlockLocators)
+ deleteCollection(t, toBeDeletedCollectionUUID)
+ deleteCollection(t, secondOfTwoWithSameDataUUID)
+
+ // Run data manager again
+ dataManagerSingleRun(t)
+ waitUntilQueuesFinishWork(t)
+
+ // Get block indexes and verify that all backdated blocks except the first one used in collection are not included.
+ expected = expected[:0]
+ expected = append(expected, oldUsedBlockLocator)
+ expected = append(expected, newBlockLocators...)
+ expected = append(expected, toBeDeletedCollectionLocator)
+ expected = append(expected, oneOfTwoWithSameDataLocator)
+ expected = append(expected, secondOfTwoWithSameDataLocator)
+
+ verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
+
+ // Reduce desired replication on replicationCollectionUUID
+ // collection, and verify that Data Manager does not reduce
+ // actual replication any further than that. (It might not
+ // reduce actual replication at all; that's OK for this test.)
+
+ // Reduce desired replication level.
+ updateCollection(t, replicationCollectionUUID, "replication_desired", "1")
+ collection := getCollection(t, replicationCollectionUUID)
+ if collection["replication_desired"].(interface{}) != float64(1) {
+ t.Fatalf("After update replication_desired is not 1; instead it is %v", collection["replication_desired"])
+ }
+
+ // Verify data is currently overreplicated.
+ verifyBlocks(t, nil, []string{replicationCollectionLocator}, 2)
+
+ // Run data manager again
+ dataManagerSingleRun(t)
+ waitUntilQueuesFinishWork(t)
+
+ // Verify data is not underreplicated.
+ verifyBlocks(t, nil, []string{replicationCollectionLocator}, 1)
+
+ // Verify *other* collections' data is not underreplicated.
+ verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
+}
+
+func TestDatamanagerSingleRunRepeatedly(t *testing.T) {
+ defer TearDownDataManagerTest(t)
+ SetupDataManagerTest(t)
+
+ for i := 0; i < 10; i++ {
+ err := singlerun(arv)
+ if err != nil {
+ t.Fatalf("Got an error during datamanager singlerun: %v", err)
+ }
+ }
+}
+
+func TestGetStatusRepeatedly(t *testing.T) {
+ defer TearDownDataManagerTest(t)
+ SetupDataManagerTest(t)
+
+ for i := 0; i < 10; i++ {
+ for j := 0; j < 2; j++ {
+ s := getStatus(t, keepServers[j]+"/status.json")
+
+ var pullQueueStatus interface{}
+ pullQueueStatus = s.(map[string]interface{})["PullQueue"]
+ var trashQueueStatus interface{}
+ trashQueueStatus = s.(map[string]interface{})["TrashQueue"]
+
+ if pullQueueStatus.(map[string]interface{})["Queued"] == nil ||
+ pullQueueStatus.(map[string]interface{})["InProgress"] == nil ||
+ trashQueueStatus.(map[string]interface{})["Queued"] == nil ||
+ trashQueueStatus.(map[string]interface{})["InProgress"] == nil {
+ t.Fatalf("PullQueue and TrashQueue status not found")
+ }
+
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+}
+
+func TestRunDatamanagerWithBogusServer(t *testing.T) {
+ defer TearDownDataManagerTest(t)
+ SetupDataManagerTest(t)
+
+ arv.ApiServer = "bogus-server"
+
+ err := singlerun(arv)
+ if err == nil {
+ t.Fatalf("Expected error during singlerun with bogus server")
+ }
+}
+
+func TestRunDatamanagerAsNonAdminUser(t *testing.T) {
+ defer TearDownDataManagerTest(t)
+ SetupDataManagerTest(t)
+
+ arv.ApiToken = ActiveUserToken
+
+ err := singlerun(arv)
+ if err == nil {
+ t.Fatalf("Expected error during singlerun as non-admin user")
+ }
+}
"bufio"
"encoding/json"
"errors"
- "flag"
"fmt"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
"net/http"
"strconv"
"strings"
- "sync"
"time"
)
+// ServerAddress struct
type ServerAddress struct {
- SSL bool `json:service_ssl_flag`
- Host string `json:"service_host"`
- Port int `json:"service_port"`
- Uuid string `json:"uuid"`
+ SSL bool `json:service_ssl_flag`
+ Host string `json:"service_host"`
+ Port int `json:"service_port"`
+ UUID string `json:"uuid"`
+ ServiceType string `json:"service_type"`
}
-// Info about a particular block returned by the server
+// BlockInfo is info about a particular block returned by the server
type BlockInfo struct {
Digest blockdigest.DigestWithSize
Mtime int64 // TODO(misha): Replace this with a timestamp.
}
-// Info about a specified block given by a server
+// BlockServerInfo is info about a specified block given by a server
type BlockServerInfo struct {
ServerIndex int
Mtime int64 // TODO(misha): Replace this with a timestamp.
}
+// ServerContents struct
type ServerContents struct {
BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
}
+// ServerResponse struct
type ServerResponse struct {
Address ServerAddress
Contents ServerContents
}
+// ReadServers struct
type ReadServers struct {
ReadAllServers bool
KeepServerIndexToAddress []ServerAddress
BlockReplicationCounts map[int]int
}
+// GetKeepServersParams struct
type GetKeepServersParams struct {
Client arvadosclient.ArvadosClient
Logger *logger.Logger
Limit int
}
-type KeepServiceList struct {
+// ServiceList consists of the addresses of all the available kee servers
+type ServiceList struct {
ItemsAvailable int `json:"items_available"`
KeepServers []ServerAddress `json:"items"`
}
-var (
- // Don't access the token directly, use getDataManagerToken() to
- // make sure it's been read.
- dataManagerToken string
- dataManagerTokenFile string
- dataManagerTokenFileReadOnce sync.Once
-)
-
-func init() {
- flag.StringVar(&dataManagerTokenFile,
- "data-manager-token-file",
- "",
- "File with the API token we should use to contact keep servers.")
-}
-
+// String
// TODO(misha): Change this to include the UUID as well.
func (s ServerAddress) String() string {
return s.URL()
}
+// URL of the keep server
func (s ServerAddress) URL() string {
if s.SSL {
return fmt.Sprintf("https://%s:%d", s.Host, s.Port)
- } else {
- return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
- }
-}
-
-func GetDataManagerToken(arvLogger *logger.Logger) string {
- readDataManagerToken := func() {
- if dataManagerTokenFile == "" {
- flag.Usage()
- loggerutil.FatalWithMessage(arvLogger,
- "Data Manager Token needed, but data manager token file not specified.")
- } else {
- rawRead, err := ioutil.ReadFile(dataManagerTokenFile)
- if err != nil {
- loggerutil.FatalWithMessage(arvLogger,
- fmt.Sprintf("Unexpected error reading token file %s: %v",
- dataManagerTokenFile,
- err))
- }
- dataManagerToken = strings.TrimSpace(string(rawRead))
- }
}
-
- dataManagerTokenFileReadOnce.Do(readDataManagerToken)
- return dataManagerToken
+ return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
}
+// GetKeepServersAndSummarize gets keep servers from api
func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServers) {
results = GetKeepServers(params)
log.Printf("Returned %d keep disks", len(results.ServerToContents))
return
}
+// GetKeepServers from api server
func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
- if ¶ms.Client == nil {
- log.Fatalf("params.Client passed to GetKeepServers() should " +
- "contain a valid ArvadosClient, but instead it is nil.")
- }
-
sdkParams := arvadosclient.Dict{
- "filters": [][]string{[]string{"service_type", "=", "disk"}},
+ "filters": [][]string{[]string{"service_type", "!=", "proxy"}},
}
if params.Limit > 0 {
sdkParams["limit"] = params.Limit
}
- var sdkResponse KeepServiceList
+ var sdkResponse ServiceList
err := params.Client.List("keep_services", sdkParams, &sdkResponse)
if err != nil {
fmt.Sprintf("Error requesting keep disks from API server: %v", err))
}
+ // Currently, only "disk" types are supported. Stop if any other service types are found.
+ for _, server := range sdkResponse.KeepServers {
+ if server.ServiceType != "disk" {
+ loggerutil.FatalWithMessage(params.Logger,
+ fmt.Sprintf("Unsupported service type %q found for: %v", server.ServiceType, server))
+ }
+ }
+
if params.Logger != nil {
params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
keepInfo := logger.GetOrCreateMap(p, "keep_info")
log.Printf("Got Server Addresses: %v", results)
- // This is safe for concurrent use
- client := http.Client{}
-
// Send off all the index requests concurrently
responseChan := make(chan ServerResponse)
for _, keepServer := range sdkResponse.KeepServers {
go func(keepServer ServerAddress) {
responseChan <- GetServerContents(params.Logger,
keepServer,
- client)
+ params.Client)
}(keepServer)
}
return
}
+// GetServerContents of the keep server
func GetServerContents(arvLogger *logger.Logger,
keepServer ServerAddress,
- client http.Client) (response ServerResponse) {
+ arv arvadosclient.ArvadosClient) (response ServerResponse) {
- GetServerStatus(arvLogger, keepServer, client)
+ GetServerStatus(arvLogger, keepServer, arv)
- req := CreateIndexRequest(arvLogger, keepServer)
- resp, err := client.Do(req)
+ req := CreateIndexRequest(arvLogger, keepServer, arv)
+ resp, err := arv.Client.Do(req)
if err != nil {
loggerutil.FatalWithMessage(arvLogger,
fmt.Sprintf("Error fetching %s: %v. Response was %+v",
return ReadServerResponse(arvLogger, keepServer, resp)
}
+// GetServerStatus get keep server status by invoking /status.json
func GetServerStatus(arvLogger *logger.Logger,
keepServer ServerAddress,
- client http.Client) {
+ arv arvadosclient.ArvadosClient) {
url := fmt.Sprintf("http://%s:%d/status.json",
keepServer.Host,
keepServer.Port)
serverInfo["host"] = keepServer.Host
serverInfo["port"] = keepServer.Port
- keepInfo[keepServer.Uuid] = serverInfo
+ keepInfo[keepServer.UUID] = serverInfo
})
}
- resp, err := client.Get(url)
+ resp, err := arv.Client.Get(url)
if err != nil {
loggerutil.FatalWithMessage(arvLogger,
fmt.Sprintf("Error getting keep status from %s: %v", url, err))
now := time.Now()
arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
keepInfo := logger.GetOrCreateMap(p, "keep_info")
- serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+ serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
serverInfo["status_response_processed_at"] = now
serverInfo["status"] = keepStatus
})
}
}
+// CreateIndexRequest to the keep server
func CreateIndexRequest(arvLogger *logger.Logger,
- keepServer ServerAddress) (req *http.Request) {
+ keepServer ServerAddress,
+ arv arvadosclient.ArvadosClient) (req *http.Request) {
url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port)
log.Println("About to fetch keep server contents from " + url)
now := time.Now()
arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
keepInfo := logger.GetOrCreateMap(p, "keep_info")
- serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+ serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
serverInfo["index_request_sent_at"] = now
})
}
fmt.Sprintf("Error building http request for %s: %v", url, err))
}
- req.Header.Add("Authorization",
- fmt.Sprintf("OAuth2 %s", GetDataManagerToken(arvLogger)))
+ req.Header.Add("Authorization", "OAuth2 "+arv.ApiToken)
return
}
+// ReadServerResponse reads reasponse from keep server
func ReadServerResponse(arvLogger *logger.Logger,
keepServer ServerAddress,
resp *http.Response) (response ServerResponse) {
now := time.Now()
arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
keepInfo := logger.GetOrCreateMap(p, "keep_info")
- serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+ serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
serverInfo["index_response_received_at"] = now
})
}
if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
// This server returned multiple lines containing the same block digest.
- numDuplicates += 1
+ numDuplicates++
// Keep the block that's newer.
if storedBlock.Mtime < blockInfo.Mtime {
response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
now := time.Now()
arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
keepInfo := logger.GetOrCreateMap(p, "keep_info")
- serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+ serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
serverInfo["processing_finished_at"] = now
serverInfo["lines_received"] = numLines
return
}
+// Summarize results from keep server
func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) {
readServers.BlockReplicationCounts = make(map[int]int)
for _, infos := range readServers.BlockToServers {
replication := len(infos)
- readServers.BlockReplicationCounts[replication] += 1
+ readServers.BlockReplicationCounts[replication]++
}
if arvLogger != nil {
keepInfo["distinct_blocks_stored"] = len(readServers.BlockToServers)
})
}
-
}
+// TrashRequest struct
type TrashRequest struct {
Locator string `json:"locator"`
BlockMtime int64 `json:"block_mtime"`
}
+// TrashList is an array of TrashRequest objects
type TrashList []TrashRequest
-func SendTrashLists(dataManagerToken string, kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) {
+// SendTrashLists to trash queue
+func SendTrashLists(kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) {
count := 0
barrier := make(chan error)
client := kc.Client
for url, v := range spl {
- count += 1
+ count++
log.Printf("Sending trash list to %v", url)
go (func(url string, v TrashList) {
return
}
- // Add api token header
- req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", dataManagerToken))
+ req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken)
// Make the request
var resp *http.Response
}
- for i := 0; i < count; i += 1 {
+ for i := 0; i < count; i++ {
b := <-barrier
if b != nil {
errs = append(errs, b)
request TrashList
}
-func (this *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
+func (ts *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
r := json.NewDecoder(req.Body)
- r.Decode(&this.request)
+ r.Decode(&ts.request)
}
func (s *KeepSuite) TestSendTrashLists(c *C) {
type TestHandlerError struct {
}
-func (this *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
+func (tse *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
http.Error(writer, "I'm a teapot", 418)
}
/* Ensures that we only have one copy of each unique string. This is
/* not designed for concurrent access. */
+
package summary
// This code should probably be moved somewhere more universal.
+// CanonicalString struct
type CanonicalString struct {
m map[string]string
}
+// Get a CanonicalString
func (cs *CanonicalString) Get(s string) (r string) {
if cs.m == nil {
cs.m = make(map[string]string)
readDataFrom string
)
+// DataFetcher to fetch data from keep servers
type DataFetcher func(arvLogger *logger.Logger,
readCollections *collection.ReadCollections,
keepServerInfo *keep.ReadServers)
"Avoid network i/o and read summary data from this file instead. Used for development only.")
}
-// Writes data we've read to a file.
+// MaybeWriteData writes data we've read to a file.
//
// This is useful for development, so that we don't need to read all
// our data from the network every time we tweak something.
keepServerInfo keep.ReadServers) bool {
if writeDataTo == "" {
return false
- } else {
- summaryFile, err := os.Create(writeDataTo)
- if err != nil {
- loggerutil.FatalWithMessage(arvLogger,
- fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
- }
- defer summaryFile.Close()
+ }
+ summaryFile, err := os.Create(writeDataTo)
+ if err != nil {
+ loggerutil.FatalWithMessage(arvLogger,
+ fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
+ }
+ defer summaryFile.Close()
- enc := gob.NewEncoder(summaryFile)
- data := serializedData{
- ReadCollections: readCollections,
- KeepServerInfo: keepServerInfo}
- err = enc.Encode(data)
- if err != nil {
- loggerutil.FatalWithMessage(arvLogger,
- fmt.Sprintf("Failed to write summary data: %v", err))
- }
- log.Printf("Wrote summary data to: %s", writeDataTo)
- return true
+ enc := gob.NewEncoder(summaryFile)
+ data := serializedData{
+ ReadCollections: readCollections,
+ KeepServerInfo: keepServerInfo}
+ err = enc.Encode(data)
+ if err != nil {
+ loggerutil.FatalWithMessage(arvLogger,
+ fmt.Sprintf("Failed to write summary data: %v", err))
}
+ log.Printf("Wrote summary data to: %s", writeDataTo)
+ return true
}
+// ShouldReadData should not be used outside of development
func ShouldReadData() bool {
return readDataFrom != ""
}
-// Reads data that we've written to a file.
+// ReadData reads data that we've written to a file.
//
// This is useful for development, so that we don't need to read all
// our data from the network every time we tweak something.
// Code for generating pull lists as described in https://arvados.org/projects/arvados/wiki/Keep_Design_Doc#Pull-List
+
package summary
import (
"strings"
)
+// Locator is a block digest
type Locator blockdigest.DigestWithSize
+// MarshalJSON encoding
func (l Locator) MarshalJSON() ([]byte, error) {
return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
}
-// One entry in the Pull List
+// PullRequest represents one entry in the Pull List
type PullRequest struct {
Locator Locator `json:"locator"`
Servers []string `json:"servers"`
}
-// The Pull List for a particular server
+// PullList for a particular server
type PullList []PullRequest
// PullListByLocator implements sort.Interface for PullList based on
return false
}
+// PullServers struct
// For a given under-replicated block, this structure represents which
// servers should pull the specified block and which servers they can
// pull it from.
From []string // Servers that already contain the specified block
}
-// Creates a map from block locator to PullServers with one entry for
-// each under-replicated block.
+// ComputePullServers creates a map from block locator to PullServers
+// with one entry for each under-replicated block.
//
// This method ignores zero-replica blocks since there are no servers
// to pull them from, so callers should feel free to omit them, but
writableServers[cs.Get(url)] = struct{}{}
}
- for block, _ := range underReplicated {
+ for block := range underReplicated {
serversStoringBlock := keepServerInfo.BlockToServers[block]
numCopies := len(serversStoringBlock)
numCopiesMissing := blockToDesiredReplication[block] - numCopies
return m
}
-// Creates a pull list in which the To and From fields preserve the
-// ordering of sorted servers and the contents are all canonical
-// strings.
+// CreatePullServers creates a pull list in which the To and From
+// fields preserve the ordering of sorted servers and the contents
+// are all canonical strings.
func CreatePullServers(cs CanonicalString,
serverHasBlock map[string]struct{},
writableServers map[string]struct{},
return
}
-// Strips the protocol prefix from a url.
+// RemoveProtocolPrefix strips the protocol prefix from a url.
func RemoveProtocolPrefix(url string) string {
return url[(strings.LastIndex(url, "/") + 1):]
}
-// Produces a PullList for each keep server.
+// BuildPullLists produces a PullList for each keep server.
func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
spl = map[string]PullList{}
// We don't worry about canonicalizing our strings here, because we
return
}
-// Writes each pull list to a file.
+// WritePullLists writes each pull list to a file.
// The filename is based on the hostname.
//
// This is just a hack for prototyping, it is not expected to be used
// Summarizes Collection Data and Keep Server Contents.
+
package summary
// TODO(misha): Check size of blocks as well as their digest.
"sort"
)
+// BlockSet is a map of blocks
type BlockSet map[blockdigest.DigestWithSize]struct{}
-// Adds a single block to the set.
+// Insert adds a single block to the set.
func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
bs[digest] = struct{}{}
}
-// Adds a set of blocks to the set.
+// Union adds a set of blocks to the set.
func (bs BlockSet) Union(obs BlockSet) {
for k, v := range obs {
bs[k] = v
}
}
-// We use the collection index to save space. To convert to and from
+// CollectionIndexSet is used to save space. To convert to and from
// the uuid, use collection.ReadCollections' fields
-// CollectionIndexToUuid and CollectionUuidToIndex.
+// CollectionIndexToUUID and CollectionUUIDToIndex.
type CollectionIndexSet map[int]struct{}
-// Adds a single collection to the set. The collection is specified by
+// Insert adds a single collection to the set. The collection is specified by
// its index.
func (cis CollectionIndexSet) Insert(collectionIndex int) {
cis[collectionIndex] = struct{}{}
}
+// ToCollectionIndexSet gets block to collection indices
func (bs BlockSet) ToCollectionIndexSet(
readCollections collection.ReadCollections,
collectionIndexSet *CollectionIndexSet) {
}
}
+// ReplicationLevels struct
// Keeps track of the requested and actual replication levels.
// Currently this is only used for blocks but could easily be used for
// collections as well.
Actual int
}
-// Maps from replication levels to their blocks.
+// ReplicationLevelBlockSetMap maps from replication levels to their blocks.
type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
-// An individual entry from ReplicationLevelBlockSetMap which only reports the number of blocks, not which blocks.
+// ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap
+// which only reports the number of blocks, not which blocks.
type ReplicationLevelBlockCount struct {
Levels ReplicationLevels
Count int
}
-// An ordered list of ReplicationLevelBlockCount useful for reporting.
+// ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting.
type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
+// ReplicationSummary sturct
type ReplicationSummary struct {
CollectionBlocksNotInKeep BlockSet
UnderReplicatedBlocks BlockSet
CorrectlyReplicatedCollections CollectionIndexSet
}
-// This struct counts the elements in each set in ReplicationSummary.
+// ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary.
type ReplicationSummaryCounts struct {
CollectionBlocksNotInKeep int
UnderReplicatedBlocks int
CorrectlyReplicatedCollections int
}
-// Gets the BlockSet for a given set of ReplicationLevels, creating it
-// if it doesn't already exist.
+// GetOrCreate gets the BlockSet for a given set of ReplicationLevels,
+// creating it if it doesn't already exist.
func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
repLevels ReplicationLevels) (bs BlockSet) {
bs, exists := rlbs[repLevels]
return
}
-// Adds a block to the set for a given replication level.
+// Insert adds a block to the set for a given replication level.
func (rlbs ReplicationLevelBlockSetMap) Insert(
repLevels ReplicationLevels,
block blockdigest.DigestWithSize) {
rlbs.GetOrCreate(repLevels).Insert(block)
}
-// Adds a set of blocks to the set for a given replication level.
+// Union adds a set of blocks to the set for a given replication level.
func (rlbs ReplicationLevelBlockSetMap) Union(
repLevels ReplicationLevels,
bs BlockSet) {
rlbs.GetOrCreate(repLevels).Union(bs)
}
-// Outputs a sorted list of ReplicationLevelBlockCounts.
+// Counts outputs a sorted list of ReplicationLevelBlockCounts.
func (rlbs ReplicationLevelBlockSetMap) Counts() (
sorted ReplicationLevelBlockSetSlice) {
sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
}
+// ComputeCounts returns ReplicationSummaryCounts
func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
// TODO(misha): Consider rewriting this method to iterate through
// the fields using reflection, instead of explictily listing the
return rsc
}
+// PrettyPrint ReplicationSummaryCounts
func (rsc ReplicationSummaryCounts) PrettyPrint() string {
return fmt.Sprintf("Replication Block Counts:"+
"\n Missing From Keep: %d, "+
rsc.CorrectlyReplicatedCollections)
}
+// BucketReplication returns ReplicationLevelBlockSetMap
func BucketReplication(readCollections collection.ReadCollections,
- keepServerInfo keep.ReadServers) (rlbsm ReplicationLevelBlockSetMap) {
- rlbsm = make(ReplicationLevelBlockSetMap)
+ keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) {
+ rlbs = make(ReplicationLevelBlockSetMap)
for block, requestedReplication := range readCollections.BlockToDesiredReplication {
- rlbsm.Insert(
+ rlbs.Insert(
ReplicationLevels{
Requested: requestedReplication,
Actual: len(keepServerInfo.BlockToServers[block])},
for block, servers := range keepServerInfo.BlockToServers {
if 0 == readCollections.BlockToDesiredReplication[block] {
- rlbsm.Insert(
+ rlbs.Insert(
ReplicationLevels{Requested: 0, Actual: len(servers)},
block)
}
return
}
-func (rlbsm ReplicationLevelBlockSetMap) SummarizeBuckets(
+// SummarizeBuckets reads collections and summarizes
+func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
readCollections collection.ReadCollections) (
rs ReplicationSummary) {
rs.CollectionBlocksNotInKeep = make(BlockSet)
rs.OverReplicatedCollections = make(CollectionIndexSet)
rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
- for levels, bs := range rlbsm {
+ for levels, bs := range rlbs {
if levels.Actual == 0 {
rs.CollectionBlocksNotInKeep.Union(bs)
} else if levels.Requested == 0 {
rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
&rs.OverReplicatedCollections)
- for i := range readCollections.CollectionIndexToUuid {
+ for i := range readCollections.CollectionIndexToUUID {
if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
} else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
} else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
returnedSummary := SummarizeReplication(rc, keepInfo)
if !reflect.DeepEqual(returnedSummary, expectedSummary) {
- t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUuid, rc.BlockToCollectionIndices)
+ t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUUID, rc.BlockToCollectionIndices)
}
}
// Code for generating trash lists
+
package summary
import (
"time"
)
+// BuildTrashLists builds list of blocks to be sent to trash queue
func BuildTrashLists(kc *keepclient.KeepClient,
keepServerInfo *keep.ReadServers,
keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList, err error) {
m = make(map[string]keep.TrashList)
for block := range keepBlocksNotInCollections {
- for _, block_on_server := range keepServerInfo.BlockToServers[block] {
- if block_on_server.Mtime >= expiry {
+ for _, blockOnServer := range keepServerInfo.BlockToServers[block] {
+ if blockOnServer.Mtime >= expiry {
continue
}
// block is older than expire cutoff
- srv := keepServerInfo.KeepServerIndexToAddress[block_on_server.ServerIndex].String()
+ srv := keepServerInfo.KeepServerIndexToAddress[blockOnServer.ServerIndex].String()
if _, writable := writableServers[srv]; !writable {
continue
}
- m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: block_on_server.Mtime})
+ m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: blockOnServer.Mtime})
}
}
return
keep.BlockServerInfo{1, 101}}}}
// only block0 is in delete set
- var bs BlockSet = make(BlockSet)
+ var bs = make(BlockSet)
bs[block0] = struct{}{}
// Test trash list where only sv0 is on writable list.
'docker-py',
],
tests_require=[
+ 'pbr<1.7.0',
'mock',
],
test_suite='tests',
'ciso8601'
],
test_suite='tests',
- tests_require=['mock>=1.0', 'PyYAML'],
+ tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
zip_safe=False,
cmdclass={'egg_info': tagger},
)
*ApiTokenCache
}
+type IndexHandler struct {
+ *keepclient.KeepClient
+ *ApiTokenCache
+}
+
type InvalidPathHandler struct{}
type OptionsHandler struct{}
rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`,
GetBlockHandler{kc, t}).Methods("GET", "HEAD")
rest.Handle(`/{locator:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
+
+ // List all blocks
+ rest.Handle(`/index`, IndexHandler{kc, t}).Methods("GET")
+
+ // List blocks whose hash has the given prefix
+ rest.Handle(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler{kc, t}).Methods("GET")
}
if enable_put {
status = http.StatusBadGateway
}
}
+
+// ServeHTTP implementation for IndexHandler
+// Supports only GET requests for /index/{prefix:[0-9a-f]{0,32}}
+// For each keep server found in LocalRoots:
+// Invokes GetIndex using keepclient
+// Expects "complete" response (terminating with blank new line)
+// Aborts on any errors
+// Concatenates responses from all those keep servers and returns
+func (handler IndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+ SetCorsHeaders(resp)
+
+ prefix := mux.Vars(req)["prefix"]
+ var err error
+ var status int
+
+ defer func() {
+ if status != http.StatusOK {
+ http.Error(resp, err.Error(), status)
+ }
+ }()
+
+ kc := *handler.KeepClient
+
+ ok, token := CheckAuthorizationHeader(kc, handler.ApiTokenCache, req)
+ if !ok {
+ status, err = http.StatusForbidden, BadAuthorizationHeader
+ return
+ }
+
+ // Copy ArvadosClient struct and use the client's API token
+ arvclient := *kc.Arvados
+ arvclient.ApiToken = token
+ kc.Arvados = &arvclient
+
+ // Only GET method is supported
+ if req.Method != "GET" {
+ status, err = http.StatusNotImplemented, MethodNotSupported
+ return
+ }
+
+ // Get index from all LocalRoots and write to resp
+ var reader io.Reader
+ for uuid := range kc.LocalRoots() {
+ reader, err = kc.GetIndex(uuid, prefix)
+ if err != nil {
+ status = http.StatusBadGateway
+ return
+ }
+
+ _, err = io.Copy(resp, reader)
+ if err != nil {
+ status = http.StatusBadGateway
+ return
+ }
+ }
+
+ // Got index from all the keep servers and wrote to resp
+ status = http.StatusOK
+ resp.Write([]byte("\n"))
+}
func (s *ServerRequiredSuite) SetUpSuite(c *C) {
arvadostest.StartAPI()
- arvadostest.StartKeep()
+ arvadostest.StartKeep(2, false)
}
func (s *ServerRequiredSuite) SetUpTest(c *C) {
}
func (s *ServerRequiredSuite) TearDownSuite(c *C) {
- arvadostest.StopKeep()
+ arvadostest.StopKeep(2)
arvadostest.StopAPI()
}
c.Check(err, Equals, nil)
body, err := ioutil.ReadAll(resp.Body)
c.Check(err, Equals, nil)
- c.Check(string(body), Equals,
- fmt.Sprintf("%x+%d", md5.Sum([]byte("qux")), 3))
+ c.Check(string(body), Matches,
+ fmt.Sprintf(`^%x\+3(\+.+)?$`, md5.Sum([]byte("qux"))))
}
}
"http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73")
}
+
+// Test GetIndex
+// Put one block, with 2 replicas
+// With no prefix (expect the block locator, twice)
+// With an existing prefix (expect the block locator, twice)
+// With a valid but non-existing prefix (expect "\n")
+// With an invalid prefix (expect error)
+func (s *ServerRequiredSuite) TestGetIndex(c *C) {
+ kc := runProxy(c, []string{"keepproxy"}, 28852, false)
+ waitForListener()
+ defer closeListener()
+
+ // Put "index-data" blocks
+ data := []byte("index-data")
+ hash := fmt.Sprintf("%x", md5.Sum(data))
+
+ hash2, rep, err := kc.PutB(data)
+ c.Check(hash2, Matches, fmt.Sprintf(`^%s\+10(\+.+)?$`, hash))
+ c.Check(rep, Equals, 2)
+ c.Check(err, Equals, nil)
+
+ reader, blocklen, _, err := kc.Get(hash)
+ c.Assert(err, Equals, nil)
+ c.Check(blocklen, Equals, int64(10))
+ all, err := ioutil.ReadAll(reader)
+ c.Check(all, DeepEquals, data)
+
+ // Put some more blocks
+ _, rep, err = kc.PutB([]byte("some-more-index-data"))
+ c.Check(err, Equals, nil)
+
+ // Invoke GetIndex
+ for _, spec := range []struct {
+ prefix string
+ expectTestHash bool
+ expectOther bool
+ }{
+ {"", true, true}, // with no prefix
+ {hash[:3], true, false}, // with matching prefix
+ {"abcdef", false, false}, // with no such prefix
+ } {
+ indexReader, err := kc.GetIndex("proxy", spec.prefix)
+ c.Assert(err, Equals, nil)
+ indexResp, err := ioutil.ReadAll(indexReader)
+ c.Assert(err, Equals, nil)
+ locators := strings.Split(string(indexResp), "\n")
+ gotTestHash := 0
+ gotOther := 0
+ for _, locator := range locators {
+ if locator == "" {
+ continue
+ }
+ c.Check(locator[:len(spec.prefix)], Equals, spec.prefix)
+ if locator[:32] == hash {
+ gotTestHash++
+ } else {
+ gotOther++
+ }
+ }
+ c.Check(gotTestHash == 2, Equals, spec.expectTestHash)
+ c.Check(gotOther > 0, Equals, spec.expectOther)
+ }
+
+ // GetIndex with invalid prefix
+ _, err = kc.GetIndex("proxy", "xyz")
+ c.Assert((err != nil), Equals, true)
+}
--- /dev/null
+user="root"
+group="root"
+chroot="/"
+chdir="/"
+nice=""
+args="-listen=':9100'"
+
--- /dev/null
+#!/bin/sh
+# Init script for keepproxy
+# Maintained by
+# Generated by pleaserun.
+# Implemented based on LSB Core 3.1:
+# * Sections: 20.2, 20.3
+#
+### BEGIN INIT INFO
+# Provides: keepproxy
+# Required-Start: $remote_fs $syslog
+# Required-Stop: $remote_fs $syslog
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description:
+# Description: no description given
+### END INIT INFO
+
+PATH=/sbin:/usr/sbin:/bin:/usr/bin
+export PATH
+
+name=keepproxy
+program=/usr/bin/keepproxy
+args=''
+pidfile="/var/run/$name.pid"
+
+[ -r /etc/default/$name ] && . /etc/default/$name
+[ -r /etc/sysconfig/$name ] && . /etc/sysconfig/$name
+
+trace() {
+ logger -t "/etc/init.d/keepproxy" "$@"
+}
+
+emit() {
+ trace "$@"
+ echo "$@"
+}
+
+start() {
+
+ # Ensure the log directory is setup correctly.
+ [ ! -d "/var/log/" ] && mkdir "/var/log/"
+ chown "$user":"$group" "/var/log/"
+ chmod 755 "/var/log/"
+
+
+ # Setup any environmental stuff beforehand
+
+
+ # Run the program!
+
+ chroot --userspec "$user":"$group" "$chroot" sh -c "
+
+ cd \"$chdir\"
+ exec \"$program\" $args
+ " >> /var/log/keepproxy.stdout 2>> /var/log/keepproxy.stderr &
+
+ # Generate the pidfile from here. If we instead made the forked process
+ # generate it there will be a race condition between the pidfile writing
+ # and a process possibly asking for status.
+ echo $! > $pidfile
+
+ emit "$name started"
+ return 0
+}
+
+stop() {
+ # Try a few times to kill TERM the program
+ if status ; then
+ pid=$(cat "$pidfile")
+ trace "Killing $name (pid $pid) with SIGTERM"
+ kill -TERM $pid
+ # Wait for it to exit.
+ for i in 1 2 3 4 5 ; do
+ trace "Waiting $name (pid $pid) to die..."
+ status || break
+ sleep 1
+ done
+ if status ; then
+ emit "$name stop failed; still running."
+ else
+ emit "$name stopped."
+ fi
+ fi
+}
+
+status() {
+ if [ -f "$pidfile" ] ; then
+ pid=$(cat "$pidfile")
+ if ps -p $pid > /dev/null 2> /dev/null ; then
+ # process by this pid is running.
+ # It may not be our pid, but that's what you get with just pidfiles.
+ # TODO(sissel): Check if this process seems to be the same as the one we
+ # expect. It'd be nice to use flock here, but flock uses fork, not exec,
+ # so it makes it quite awkward to use in this case.
+ return 0
+ else
+ return 2 # program is dead but pid file exists
+ fi
+ else
+ return 3 # program is not running
+ fi
+}
+
+force_stop() {
+ if status ; then
+ stop
+ status && kill -KILL $(cat "$pidfile")
+ fi
+}
+
+
+case "$1" in
+ force-start|start|stop|force-stop|restart)
+ trace "Attempting '$1' on keepproxy"
+ ;;
+esac
+
+case "$1" in
+ force-start)
+ PRESTART=no
+ exec "$0" start
+ ;;
+ start)
+ status
+ code=$?
+ if [ $code -eq 0 ]; then
+ emit "$name is already running"
+ exit $code
+ else
+ start
+ exit $?
+ fi
+ ;;
+ stop) stop ;;
+ force-stop) force_stop ;;
+ status)
+ status
+ code=$?
+ if [ $code -eq 0 ] ; then
+ emit "$name is running"
+ else
+ emit "$name is not running"
+ fi
+ exit $code
+ ;;
+ restart)
+
+ stop && start
+ ;;
+ *)
+ echo "Usage: $SCRIPTNAME {start|force-start|stop|force-start|force-stop|status|restart}" >&2
+ exit 3
+ ;;
+esac
+
+exit $?
--- /dev/null
+package main
+
+import (
+ "bytes"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "os"
+ "regexp"
+ "strings"
+ "time"
+
+ "github.com/curoverse/azure-sdk-for-go/storage"
+)
+
+var (
+ azureStorageAccountName string
+ azureStorageAccountKeyFile string
+ azureStorageReplication int
+ azureWriteRaceInterval = 15 * time.Second
+ azureWriteRacePollTime = time.Second
+)
+
+func readKeyFromFile(file string) (string, error) {
+ buf, err := ioutil.ReadFile(file)
+ if err != nil {
+ return "", errors.New("reading key from " + file + ": " + err.Error())
+ }
+ accountKey := strings.TrimSpace(string(buf))
+ if accountKey == "" {
+ return "", errors.New("empty account key in " + file)
+ }
+ return accountKey, nil
+}
+
+type azureVolumeAdder struct {
+ *volumeSet
+}
+
+func (s *azureVolumeAdder) Set(containerName string) error {
+ if containerName == "" {
+ return errors.New("no container name given")
+ }
+ if azureStorageAccountName == "" || azureStorageAccountKeyFile == "" {
+ return errors.New("-azure-storage-account-name and -azure-storage-account-key-file arguments must given before -azure-storage-container-volume")
+ }
+ accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
+ if err != nil {
+ return err
+ }
+ azClient, err := storage.NewBasicClient(azureStorageAccountName, accountKey)
+ if err != nil {
+ return errors.New("creating Azure storage client: " + err.Error())
+ }
+ if flagSerializeIO {
+ log.Print("Notice: -serialize is not supported by azure-blob-container volumes.")
+ }
+ v := NewAzureBlobVolume(azClient, containerName, flagReadonly, azureStorageReplication)
+ if err := v.Check(); err != nil {
+ return err
+ }
+ *s.volumeSet = append(*s.volumeSet, v)
+ return nil
+}
+
+func init() {
+ flag.Var(&azureVolumeAdder{&volumes},
+ "azure-storage-container-volume",
+ "Use the given container as a storage volume. Can be given multiple times.")
+ flag.StringVar(
+ &azureStorageAccountName,
+ "azure-storage-account-name",
+ "",
+ "Azure storage account name used for subsequent --azure-storage-container-volume arguments.")
+ flag.StringVar(
+ &azureStorageAccountKeyFile,
+ "azure-storage-account-key-file",
+ "",
+ "File containing the account key used for subsequent --azure-storage-container-volume arguments.")
+ flag.IntVar(
+ &azureStorageReplication,
+ "azure-storage-replication",
+ 3,
+ "Replication level to report to clients when data is stored in an Azure container.")
+}
+
+// An AzureBlobVolume stores and retrieves blocks in an Azure Blob
+// container.
+type AzureBlobVolume struct {
+ azClient storage.Client
+ bsClient storage.BlobStorageClient
+ containerName string
+ readonly bool
+ replication int
+}
+
+// NewAzureBlobVolume returns a new AzureBlobVolume using the given
+// client and container name. The replication argument specifies the
+// replication level to report when writing data.
+func NewAzureBlobVolume(client storage.Client, containerName string, readonly bool, replication int) *AzureBlobVolume {
+ return &AzureBlobVolume{
+ azClient: client,
+ bsClient: client.GetBlobService(),
+ containerName: containerName,
+ readonly: readonly,
+ replication: replication,
+ }
+}
+
+// Check returns nil if the volume is usable.
+func (v *AzureBlobVolume) Check() error {
+ ok, err := v.bsClient.ContainerExists(v.containerName)
+ if err != nil {
+ return err
+ }
+ if !ok {
+ return errors.New("container does not exist")
+ }
+ return nil
+}
+
+// Get reads a Keep block that has been stored as a block blob in the
+// container.
+//
+// If the block is younger than azureWriteRaceInterval and is
+// unexpectedly empty, assume a PutBlob operation is in progress, and
+// wait for it to finish writing.
+func (v *AzureBlobVolume) Get(loc string) ([]byte, error) {
+ var deadline time.Time
+ haveDeadline := false
+ buf, err := v.get(loc)
+ for err == nil && len(buf) == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
+ // Seeing a brand new empty block probably means we're
+ // in a race with CreateBlob, which under the hood
+ // (apparently) does "CreateEmpty" and "CommitData"
+ // with no additional transaction locking.
+ if !haveDeadline {
+ t, err := v.Mtime(loc)
+ if err != nil {
+ log.Print("Got empty block (possible race) but Mtime failed: ", err)
+ break
+ }
+ deadline = t.Add(azureWriteRaceInterval)
+ if time.Now().After(deadline) {
+ break
+ }
+ log.Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
+ haveDeadline = true
+ } else if time.Now().After(deadline) {
+ break
+ }
+ bufs.Put(buf)
+ time.Sleep(azureWriteRacePollTime)
+ buf, err = v.get(loc)
+ }
+ if haveDeadline {
+ log.Printf("Race ended with len(buf)==%d", len(buf))
+ }
+ return buf, err
+}
+
+func (v *AzureBlobVolume) get(loc string) ([]byte, error) {
+ rdr, err := v.bsClient.GetBlob(v.containerName, loc)
+ if err != nil {
+ return nil, v.translateError(err)
+ }
+ defer rdr.Close()
+ buf := bufs.Get(BlockSize)
+ n, err := io.ReadFull(rdr, buf)
+ switch err {
+ case nil, io.EOF, io.ErrUnexpectedEOF:
+ return buf[:n], nil
+ default:
+ bufs.Put(buf)
+ return nil, err
+ }
+}
+
+// Compare the given data with existing stored data.
+func (v *AzureBlobVolume) Compare(loc string, expect []byte) error {
+ rdr, err := v.bsClient.GetBlob(v.containerName, loc)
+ if err != nil {
+ return v.translateError(err)
+ }
+ defer rdr.Close()
+ return compareReaderWithBuf(rdr, expect, loc[:32])
+}
+
+// Put sotres a Keep block as a block blob in the container.
+func (v *AzureBlobVolume) Put(loc string, block []byte) error {
+ if v.readonly {
+ return MethodDisabledError
+ }
+ return v.bsClient.CreateBlockBlobFromReader(v.containerName, loc, uint64(len(block)), bytes.NewReader(block))
+}
+
+// Touch updates the last-modified property of a block blob.
+func (v *AzureBlobVolume) Touch(loc string) error {
+ if v.readonly {
+ return MethodDisabledError
+ }
+ return v.bsClient.SetBlobMetadata(v.containerName, loc, map[string]string{
+ "touch": fmt.Sprintf("%d", time.Now()),
+ })
+}
+
+// Mtime returns the last-modified property of a block blob.
+func (v *AzureBlobVolume) Mtime(loc string) (time.Time, error) {
+ props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
+ if err != nil {
+ return time.Time{}, err
+ }
+ return time.Parse(time.RFC1123, props.LastModified)
+}
+
+// IndexTo writes a list of Keep blocks that are stored in the
+// container.
+func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error {
+ params := storage.ListBlobsParameters{
+ Prefix: prefix,
+ }
+ for {
+ resp, err := v.bsClient.ListBlobs(v.containerName, params)
+ if err != nil {
+ return err
+ }
+ for _, b := range resp.Blobs {
+ t, err := time.Parse(time.RFC1123, b.Properties.LastModified)
+ if err != nil {
+ return err
+ }
+ if !v.isKeepBlock(b.Name) {
+ continue
+ }
+ if b.Properties.ContentLength == 0 && t.Add(azureWriteRaceInterval).After(time.Now()) {
+ // A new zero-length blob is probably
+ // just a new non-empty blob that
+ // hasn't committed its data yet (see
+ // Get()), and in any case has no
+ // value.
+ continue
+ }
+ fmt.Fprintf(writer, "%s+%d %d\n", b.Name, b.Properties.ContentLength, t.Unix())
+ }
+ if resp.NextMarker == "" {
+ return nil
+ }
+ params.Marker = resp.NextMarker
+ }
+}
+
+// Delete a Keep block.
+func (v *AzureBlobVolume) Delete(loc string) error {
+ if v.readonly {
+ return MethodDisabledError
+ }
+ // Ideally we would use If-Unmodified-Since, but that
+ // particular condition seems to be ignored by Azure. Instead,
+ // we get the Etag before checking Mtime, and use If-Match to
+ // ensure we don't delete data if Put() or Touch() happens
+ // between our calls to Mtime() and DeleteBlob().
+ props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
+ if err != nil {
+ return err
+ }
+ if t, err := v.Mtime(loc); err != nil {
+ return err
+ } else if time.Since(t) < blobSignatureTTL {
+ return nil
+ }
+ return v.bsClient.DeleteBlob(v.containerName, loc, map[string]string{
+ "If-Match": props.Etag,
+ })
+}
+
+// Status returns a VolumeStatus struct with placeholder data.
+func (v *AzureBlobVolume) Status() *VolumeStatus {
+ return &VolumeStatus{
+ DeviceNum: 1,
+ BytesFree: BlockSize * 1000,
+ BytesUsed: 1,
+ }
+}
+
+// String returns a volume label, including the container name.
+func (v *AzureBlobVolume) String() string {
+ return fmt.Sprintf("azure-storage-container:%+q", v.containerName)
+}
+
+// Writable returns true, unless the -readonly flag was on when the
+// volume was added.
+func (v *AzureBlobVolume) Writable() bool {
+ return !v.readonly
+}
+
+// Replication returns the replication level of the container, as
+// specified by the -azure-storage-replication argument.
+func (v *AzureBlobVolume) Replication() int {
+ return v.replication
+}
+
+// If possible, translate an Azure SDK error to a recognizable error
+// like os.ErrNotExist.
+func (v *AzureBlobVolume) translateError(err error) error {
+ switch {
+ case err == nil:
+ return err
+ case strings.Contains(err.Error(), "404 Not Found"):
+ // "storage: service returned without a response body (404 Not Found)"
+ return os.ErrNotExist
+ default:
+ return err
+ }
+}
+
+var keepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
+func (v *AzureBlobVolume) isKeepBlock(s string) bool {
+ return keepBlockRegexp.MatchString(s)
+}
--- /dev/null
+package main
+
+import (
+ "bytes"
+ "encoding/base64"
+ "encoding/xml"
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "math/rand"
+ "net"
+ "net/http"
+ "net/http/httptest"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+
+ "github.com/curoverse/azure-sdk-for-go/storage"
+)
+
+const (
+ // The same fake credentials used by Microsoft's Azure emulator
+ emulatorAccountName = "devstoreaccount1"
+ emulatorAccountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+)
+
+var azureTestContainer string
+
+func init() {
+ flag.StringVar(
+ &azureTestContainer,
+ "test.azure-storage-container-volume",
+ "",
+ "Name of Azure container to use for testing. Do not use a container with real data! Use -azure-storage-account-name and -azure-storage-key-file arguments to supply credentials.")
+}
+
+type azBlob struct {
+ Data []byte
+ Etag string
+ Metadata map[string]string
+ Mtime time.Time
+ Uncommitted map[string][]byte
+}
+
+type azStubHandler struct {
+ sync.Mutex
+ blobs map[string]*azBlob
+ race chan chan struct{}
+}
+
+func newAzStubHandler() *azStubHandler {
+ return &azStubHandler{
+ blobs: make(map[string]*azBlob),
+ }
+}
+
+func (h *azStubHandler) TouchWithDate(container, hash string, t time.Time) {
+ blob, ok := h.blobs[container+"|"+hash]
+ if !ok {
+ return
+ }
+ blob.Mtime = t
+}
+
+func (h *azStubHandler) PutRaw(container, hash string, data []byte) {
+ h.Lock()
+ defer h.Unlock()
+ h.blobs[container+"|"+hash] = &azBlob{
+ Data: data,
+ Mtime: time.Now(),
+ Uncommitted: make(map[string][]byte),
+ }
+}
+
+func (h *azStubHandler) unlockAndRace() {
+ if h.race == nil {
+ return
+ }
+ h.Unlock()
+ // Signal caller that race is starting by reading from
+ // h.race. If we get a channel, block until that channel is
+ // ready to receive. If we get nil (or h.race is closed) just
+ // proceed.
+ if c := <-h.race; c != nil {
+ c <- struct{}{}
+ }
+ h.Lock()
+}
+
+func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
+ h.Lock()
+ defer h.Unlock()
+ // defer log.Printf("azStubHandler: %+v", r)
+
+ path := strings.Split(r.URL.Path, "/")
+ container := path[1]
+ hash := ""
+ if len(path) > 2 {
+ hash = path[2]
+ }
+
+ if err := r.ParseForm(); err != nil {
+ log.Printf("azStubHandler(%+v): %s", r, err)
+ rw.WriteHeader(http.StatusBadRequest)
+ return
+ }
+
+ body, err := ioutil.ReadAll(r.Body)
+ if err != nil {
+ return
+ }
+
+ type blockListRequestBody struct {
+ XMLName xml.Name `xml:"BlockList"`
+ Uncommitted []string
+ }
+
+ blob, blobExists := h.blobs[container+"|"+hash]
+
+ switch {
+ case r.Method == "PUT" && r.Form.Get("comp") == "":
+ // "Put Blob" API
+ if _, ok := h.blobs[container+"|"+hash]; !ok {
+ // Like the real Azure service, we offer a
+ // race window during which other clients can
+ // list/get the new blob before any data is
+ // committed.
+ h.blobs[container+"|"+hash] = &azBlob{
+ Mtime: time.Now(),
+ Uncommitted: make(map[string][]byte),
+ Etag: makeEtag(),
+ }
+ h.unlockAndRace()
+ }
+ h.blobs[container+"|"+hash] = &azBlob{
+ Data: body,
+ Mtime: time.Now(),
+ Uncommitted: make(map[string][]byte),
+ Etag: makeEtag(),
+ }
+ rw.WriteHeader(http.StatusCreated)
+ case r.Method == "PUT" && r.Form.Get("comp") == "block":
+ // "Put Block" API
+ if !blobExists {
+ log.Printf("Got block for nonexistent blob: %+v", r)
+ rw.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ blockID, err := base64.StdEncoding.DecodeString(r.Form.Get("blockid"))
+ if err != nil || len(blockID) == 0 {
+ log.Printf("Invalid blockid: %+q", r.Form.Get("blockid"))
+ rw.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ blob.Uncommitted[string(blockID)] = body
+ rw.WriteHeader(http.StatusCreated)
+ case r.Method == "PUT" && r.Form.Get("comp") == "blocklist":
+ // "Put Block List" API
+ bl := &blockListRequestBody{}
+ if err := xml.Unmarshal(body, bl); err != nil {
+ log.Printf("xml Unmarshal: %s", err)
+ rw.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ for _, encBlockID := range bl.Uncommitted {
+ blockID, err := base64.StdEncoding.DecodeString(encBlockID)
+ if err != nil || len(blockID) == 0 || blob.Uncommitted[string(blockID)] == nil {
+ log.Printf("Invalid blockid: %+q", encBlockID)
+ rw.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ blob.Data = blob.Uncommitted[string(blockID)]
+ blob.Etag = makeEtag()
+ blob.Mtime = time.Now()
+ delete(blob.Uncommitted, string(blockID))
+ }
+ rw.WriteHeader(http.StatusCreated)
+ case r.Method == "PUT" && r.Form.Get("comp") == "metadata":
+ // "Set Metadata Headers" API. We don't bother
+ // stubbing "Get Metadata Headers": AzureBlobVolume
+ // sets metadata headers only as a way to bump Etag
+ // and Last-Modified.
+ if !blobExists {
+ log.Printf("Got metadata for nonexistent blob: %+v", r)
+ rw.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ blob.Metadata = make(map[string]string)
+ for k, v := range r.Header {
+ if strings.HasPrefix(strings.ToLower(k), "x-ms-meta-") {
+ blob.Metadata[k] = v[0]
+ }
+ }
+ blob.Mtime = time.Now()
+ blob.Etag = makeEtag()
+ case (r.Method == "GET" || r.Method == "HEAD") && hash != "":
+ // "Get Blob" API
+ if !blobExists {
+ rw.WriteHeader(http.StatusNotFound)
+ return
+ }
+ rw.Header().Set("Last-Modified", blob.Mtime.Format(time.RFC1123))
+ rw.Header().Set("Content-Length", strconv.Itoa(len(blob.Data)))
+ if r.Method == "GET" {
+ if _, err := rw.Write(blob.Data); err != nil {
+ log.Printf("write %+q: %s", blob.Data, err)
+ }
+ }
+ h.unlockAndRace()
+ case r.Method == "DELETE" && hash != "":
+ // "Delete Blob" API
+ if !blobExists {
+ rw.WriteHeader(http.StatusNotFound)
+ return
+ }
+ delete(h.blobs, container+"|"+hash)
+ rw.WriteHeader(http.StatusAccepted)
+ case r.Method == "GET" && r.Form.Get("comp") == "list" && r.Form.Get("restype") == "container":
+ // "List Blobs" API
+ prefix := container + "|" + r.Form.Get("prefix")
+ marker := r.Form.Get("marker")
+
+ maxResults := 2
+ if n, err := strconv.Atoi(r.Form.Get("maxresults")); err == nil && n >= 1 && n <= 5000 {
+ maxResults = n
+ }
+
+ resp := storage.BlobListResponse{
+ Marker: marker,
+ NextMarker: "",
+ MaxResults: int64(maxResults),
+ }
+ var hashes sort.StringSlice
+ for k := range h.blobs {
+ if strings.HasPrefix(k, prefix) {
+ hashes = append(hashes, k[len(container)+1:])
+ }
+ }
+ hashes.Sort()
+ for _, hash := range hashes {
+ if len(resp.Blobs) == maxResults {
+ resp.NextMarker = hash
+ break
+ }
+ if len(resp.Blobs) > 0 || marker == "" || marker == hash {
+ blob := h.blobs[container+"|"+hash]
+ resp.Blobs = append(resp.Blobs, storage.Blob{
+ Name: hash,
+ Properties: storage.BlobProperties{
+ LastModified: blob.Mtime.Format(time.RFC1123),
+ ContentLength: int64(len(blob.Data)),
+ Etag: blob.Etag,
+ },
+ })
+ }
+ }
+ buf, err := xml.Marshal(resp)
+ if err != nil {
+ log.Print(err)
+ rw.WriteHeader(http.StatusInternalServerError)
+ }
+ rw.Write(buf)
+ default:
+ log.Printf("azStubHandler: not implemented: %+v Body:%+q", r, body)
+ rw.WriteHeader(http.StatusNotImplemented)
+ }
+}
+
+// azStubDialer is a net.Dialer that notices when the Azure driver
+// tries to connect to "devstoreaccount1.blob.127.0.0.1:46067", and
+// in such cases transparently dials "127.0.0.1:46067" instead.
+type azStubDialer struct {
+ net.Dialer
+}
+
+var localHostPortRe = regexp.MustCompile(`(127\.0\.0\.1|localhost|\[::1\]):\d+`)
+
+func (d *azStubDialer) Dial(network, address string) (net.Conn, error) {
+ if hp := localHostPortRe.FindString(address); hp != "" {
+ log.Println("azStubDialer: dial", hp, "instead of", address)
+ address = hp
+ }
+ return d.Dialer.Dial(network, address)
+}
+
+type TestableAzureBlobVolume struct {
+ *AzureBlobVolume
+ azHandler *azStubHandler
+ azStub *httptest.Server
+ t *testing.T
+}
+
+func NewTestableAzureBlobVolume(t *testing.T, readonly bool, replication int) *TestableAzureBlobVolume {
+ azHandler := newAzStubHandler()
+ azStub := httptest.NewServer(azHandler)
+
+ var azClient storage.Client
+
+ container := azureTestContainer
+ if container == "" {
+ // Connect to stub instead of real Azure storage service
+ stubURLBase := strings.Split(azStub.URL, "://")[1]
+ var err error
+ if azClient, err = storage.NewClient(emulatorAccountName, emulatorAccountKey, stubURLBase, storage.DefaultAPIVersion, false); err != nil {
+ t.Fatal(err)
+ }
+ container = "fakecontainername"
+ } else {
+ // Connect to real Azure storage service
+ accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
+ if err != nil {
+ t.Fatal(err)
+ }
+ azClient, err = storage.NewBasicClient(azureStorageAccountName, accountKey)
+ if err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ v := NewAzureBlobVolume(azClient, container, readonly, replication)
+
+ return &TestableAzureBlobVolume{
+ AzureBlobVolume: v,
+ azHandler: azHandler,
+ azStub: azStub,
+ t: t,
+ }
+}
+
+func TestAzureBlobVolumeWithGeneric(t *testing.T) {
+ defer func(t http.RoundTripper) {
+ http.DefaultTransport = t
+ }(http.DefaultTransport)
+ http.DefaultTransport = &http.Transport{
+ Dial: (&azStubDialer{}).Dial,
+ }
+ azureWriteRaceInterval = time.Millisecond
+ azureWriteRacePollTime = time.Nanosecond
+ DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+ return NewTestableAzureBlobVolume(t, false, azureStorageReplication)
+ })
+}
+
+func TestReadonlyAzureBlobVolumeWithGeneric(t *testing.T) {
+ defer func(t http.RoundTripper) {
+ http.DefaultTransport = t
+ }(http.DefaultTransport)
+ http.DefaultTransport = &http.Transport{
+ Dial: (&azStubDialer{}).Dial,
+ }
+ azureWriteRaceInterval = time.Millisecond
+ azureWriteRacePollTime = time.Nanosecond
+ DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+ return NewTestableAzureBlobVolume(t, true, azureStorageReplication)
+ })
+}
+
+func TestAzureBlobVolumeReplication(t *testing.T) {
+ for r := 1; r <= 4; r++ {
+ v := NewTestableAzureBlobVolume(t, false, r)
+ defer v.Teardown()
+ if n := v.Replication(); n != r {
+ t.Errorf("Got replication %d, expected %d", n, r)
+ }
+ }
+}
+
+func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
+ defer func(t http.RoundTripper) {
+ http.DefaultTransport = t
+ }(http.DefaultTransport)
+ http.DefaultTransport = &http.Transport{
+ Dial: (&azStubDialer{}).Dial,
+ }
+
+ v := NewTestableAzureBlobVolume(t, false, 3)
+ defer v.Teardown()
+
+ azureWriteRaceInterval = time.Second
+ azureWriteRacePollTime = time.Millisecond
+
+ allDone := make(chan struct{})
+ v.azHandler.race = make(chan chan struct{})
+ go func() {
+ err := v.Put(TestHash, TestBlock)
+ if err != nil {
+ t.Error(err)
+ }
+ }()
+ continuePut := make(chan struct{})
+ // Wait for the stub's Put to create the empty blob
+ v.azHandler.race <- continuePut
+ go func() {
+ buf, err := v.Get(TestHash)
+ if err != nil {
+ t.Error(err)
+ } else {
+ bufs.Put(buf)
+ }
+ close(allDone)
+ }()
+ // Wait for the stub's Get to get the empty blob
+ close(v.azHandler.race)
+ // Allow stub's Put to continue, so the real data is ready
+ // when the volume's Get retries
+ <-continuePut
+ // Wait for volume's Get to return the real data
+ <-allDone
+}
+
+func TestAzureBlobVolumeCreateBlobRaceDeadline(t *testing.T) {
+ defer func(t http.RoundTripper) {
+ http.DefaultTransport = t
+ }(http.DefaultTransport)
+ http.DefaultTransport = &http.Transport{
+ Dial: (&azStubDialer{}).Dial,
+ }
+
+ v := NewTestableAzureBlobVolume(t, false, 3)
+ defer v.Teardown()
+
+ azureWriteRaceInterval = 2 * time.Second
+ azureWriteRacePollTime = 5 * time.Millisecond
+
+ v.PutRaw(TestHash, nil)
+
+ buf := new(bytes.Buffer)
+ v.IndexTo("", buf)
+ if buf.Len() != 0 {
+ t.Errorf("Index %+q should be empty", buf.Bytes())
+ }
+
+ v.TouchWithDate(TestHash, time.Now().Add(-1982 * time.Millisecond))
+
+ allDone := make(chan struct{})
+ go func() {
+ defer close(allDone)
+ buf, err := v.Get(TestHash)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ if len(buf) != 0 {
+ t.Errorf("Got %+q, expected empty buf", buf)
+ }
+ bufs.Put(buf)
+ }()
+ select {
+ case <-allDone:
+ case <-time.After(time.Second):
+ t.Error("Get should have stopped waiting for race when block was 2s old")
+ }
+
+ buf.Reset()
+ v.IndexTo("", buf)
+ if !bytes.HasPrefix(buf.Bytes(), []byte(TestHash+"+0")) {
+ t.Errorf("Index %+q should have %+q", buf.Bytes(), TestHash+"+0")
+ }
+}
+
+func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
+ v.azHandler.PutRaw(v.containerName, locator, data)
+}
+
+func (v *TestableAzureBlobVolume) TouchWithDate(locator string, lastPut time.Time) {
+ v.azHandler.TouchWithDate(v.containerName, locator, lastPut)
+}
+
+func (v *TestableAzureBlobVolume) Teardown() {
+ v.azStub.Close()
+}
+
+func makeEtag() string {
+ return fmt.Sprintf("0x%x", rand.Int63())
+}
// Initialize a default-sized buffer pool for the benefit of test
// suites that don't run main().
func init() {
- bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+ bufs = newBufferPool(maxBuffers, BlockSize)
}
// Restore sane default after bufferpool's own tests
func (s *BufferPoolSuite) TearDownTest(c *C) {
- bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+ bufs = newBufferPool(maxBuffers, BlockSize)
}
func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
package main
import (
+ "bytes"
"crypto/md5"
"fmt"
"io"
}
var err error
for rdr != nil && err == nil {
- buf := make([]byte, 1 << 18)
+ buf := make([]byte, 1<<18)
var n int
n, err = rdr.Read(buf)
data <- buf[:n]
}
return <-outcome
}
+
+func compareReaderWithBuf(rdr io.Reader, expect []byte, hash string) error {
+ bufLen := 1 << 20
+ if bufLen > len(expect) && len(expect) > 0 {
+ // No need for bufLen to be longer than
+ // expect, except that len(buf)==0 would
+ // prevent us from handling empty readers the
+ // same way as non-empty readers: reading 0
+ // bytes at a time never reaches EOF.
+ bufLen = len(expect)
+ }
+ buf := make([]byte, bufLen)
+ cmp := expect
+
+ // Loop invariants: all data read so far matched what
+ // we expected, and the first N bytes of cmp are
+ // expected to equal the next N bytes read from
+ // rdr.
+ for {
+ n, err := rdr.Read(buf)
+ if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
+ return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], buf[:n], rdr)
+ }
+ cmp = cmp[n:]
+ if err == io.EOF {
+ if len(cmp) != 0 {
+ return collisionOrCorrupt(hash, expect[:len(expect)-len(cmp)], nil, nil)
+ }
+ return nil
+ } else if err != nil {
+ return err
+ }
+ }
+}
// A RequestTester represents the parameters for an HTTP request to
// be issued on behalf of a unit test.
type RequestTester struct {
- uri string
- api_token string
- method string
- request_body []byte
+ uri string
+ apiToken string
+ method string
+ requestBody []byte
}
// Test GetBlockHandler on the following situations:
defer KeepVM.Close()
vols := KeepVM.AllWritable()
- if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
+ if err := vols[0].Put(TestHash, TestBlock); err != nil {
t.Error(err)
}
// Create locators for testing.
// Turn on permission settings so we can generate signed locators.
- enforce_permissions = true
- PermissionSecret = []byte(known_key)
- blob_signature_ttl = 300 * time.Second
+ enforcePermissions = true
+ PermissionSecret = []byte(knownKey)
+ blobSignatureTTL = 300 * time.Second
var (
- unsigned_locator = "/" + TEST_HASH
- valid_timestamp = time.Now().Add(blob_signature_ttl)
- expired_timestamp = time.Now().Add(-time.Hour)
- signed_locator = "/" + SignLocator(TEST_HASH, known_token, valid_timestamp)
- expired_locator = "/" + SignLocator(TEST_HASH, known_token, expired_timestamp)
+ unsignedLocator = "/" + TestHash
+ validTimestamp = time.Now().Add(blobSignatureTTL)
+ expiredTimestamp = time.Now().Add(-time.Hour)
+ signedLocator = "/" + SignLocator(TestHash, knownToken, validTimestamp)
+ expiredLocator = "/" + SignLocator(TestHash, knownToken, expiredTimestamp)
)
// -----------------
// Test unauthenticated request with permissions off.
- enforce_permissions = false
+ enforcePermissions = false
// Unauthenticated request, unsigned locator
// => OK
response := IssueRequest(
&RequestTester{
method: "GET",
- uri: unsigned_locator,
+ uri: unsignedLocator,
})
ExpectStatusCode(t,
"Unauthenticated request, unsigned locator", http.StatusOK, response)
ExpectBody(t,
"Unauthenticated request, unsigned locator",
- string(TEST_BLOCK),
+ string(TestBlock),
response)
- received_cl := response.Header().Get("Content-Length")
- expected_cl := fmt.Sprintf("%d", len(TEST_BLOCK))
- if received_cl != expected_cl {
- t.Errorf("expected Content-Length %s, got %s", expected_cl, received_cl)
+ receivedLen := response.Header().Get("Content-Length")
+ expectedLen := fmt.Sprintf("%d", len(TestBlock))
+ if receivedLen != expectedLen {
+ t.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
}
// ----------------
// Permissions: on.
- enforce_permissions = true
+ enforcePermissions = true
// Authenticated request, signed locator
// => OK
response = IssueRequest(&RequestTester{
- method: "GET",
- uri: signed_locator,
- api_token: known_token,
+ method: "GET",
+ uri: signedLocator,
+ apiToken: knownToken,
})
ExpectStatusCode(t,
"Authenticated request, signed locator", http.StatusOK, response)
ExpectBody(t,
- "Authenticated request, signed locator", string(TEST_BLOCK), response)
+ "Authenticated request, signed locator", string(TestBlock), response)
- received_cl = response.Header().Get("Content-Length")
- expected_cl = fmt.Sprintf("%d", len(TEST_BLOCK))
- if received_cl != expected_cl {
- t.Errorf("expected Content-Length %s, got %s", expected_cl, received_cl)
+ receivedLen = response.Header().Get("Content-Length")
+ expectedLen = fmt.Sprintf("%d", len(TestBlock))
+ if receivedLen != expectedLen {
+ t.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
}
// Authenticated request, unsigned locator
// => PermissionError
response = IssueRequest(&RequestTester{
- method: "GET",
- uri: unsigned_locator,
- api_token: known_token,
+ method: "GET",
+ uri: unsignedLocator,
+ apiToken: knownToken,
})
ExpectStatusCode(t, "unsigned locator", PermissionError.HTTPCode, response)
// => PermissionError
response = IssueRequest(&RequestTester{
method: "GET",
- uri: signed_locator,
+ uri: signedLocator,
})
ExpectStatusCode(t,
"Unauthenticated request, signed locator",
// Authenticated request, expired locator
// => ExpiredError
response = IssueRequest(&RequestTester{
- method: "GET",
- uri: expired_locator,
- api_token: known_token,
+ method: "GET",
+ uri: expiredLocator,
+ apiToken: knownToken,
})
ExpectStatusCode(t,
"Authenticated request, expired locator",
// Unauthenticated request, no server key
// => OK (unsigned response)
- unsigned_locator := "/" + TEST_HASH
+ unsignedLocator := "/" + TestHash
response := IssueRequest(
&RequestTester{
- method: "PUT",
- uri: unsigned_locator,
- request_body: TEST_BLOCK,
+ method: "PUT",
+ uri: unsignedLocator,
+ requestBody: TestBlock,
})
ExpectStatusCode(t,
"Unauthenticated request, no server key", http.StatusOK, response)
ExpectBody(t,
"Unauthenticated request, no server key",
- TEST_HASH_PUT_RESPONSE, response)
+ TestHashPutResp, response)
// ------------------
// With a server key.
- PermissionSecret = []byte(known_key)
- blob_signature_ttl = 300 * time.Second
+ PermissionSecret = []byte(knownKey)
+ blobSignatureTTL = 300 * time.Second
// When a permission key is available, the locator returned
// from an authenticated PUT request will be signed.
// => OK (signed response)
response = IssueRequest(
&RequestTester{
- method: "PUT",
- uri: unsigned_locator,
- request_body: TEST_BLOCK,
- api_token: known_token,
+ method: "PUT",
+ uri: unsignedLocator,
+ requestBody: TestBlock,
+ apiToken: knownToken,
})
ExpectStatusCode(t,
"Authenticated PUT, signed locator, with server key",
http.StatusOK, response)
- response_locator := strings.TrimSpace(response.Body.String())
- if VerifySignature(response_locator, known_token) != nil {
+ responseLocator := strings.TrimSpace(response.Body.String())
+ if VerifySignature(responseLocator, knownToken) != nil {
t.Errorf("Authenticated PUT, signed locator, with server key:\n"+
"response '%s' does not contain a valid signature",
- response_locator)
+ responseLocator)
}
// Unauthenticated PUT, unsigned locator
// => OK
response = IssueRequest(
&RequestTester{
- method: "PUT",
- uri: unsigned_locator,
- request_body: TEST_BLOCK,
+ method: "PUT",
+ uri: unsignedLocator,
+ requestBody: TestBlock,
})
ExpectStatusCode(t,
http.StatusOK, response)
ExpectBody(t,
"Unauthenticated PUT, unsigned locator, with server key",
- TEST_HASH_PUT_RESPONSE, response)
+ TestHashPutResp, response)
}
func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
defer teardown()
- data_manager_token = "fake-data-manager-token"
+ dataManagerToken = "fake-data-manager-token"
vols := []*MockVolume{CreateMockVolume(), CreateMockVolume()}
vols[0].Readonly = true
KeepVM = MakeRRVolumeManager([]Volume{vols[0], vols[1]})
defer KeepVM.Close()
IssueRequest(
&RequestTester{
- method: "PUT",
- uri: "/" + TEST_HASH,
- request_body: TEST_BLOCK,
+ method: "PUT",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
})
defer func(orig bool) {
- never_delete = orig
- }(never_delete)
- never_delete = false
+ neverDelete = orig
+ }(neverDelete)
+ neverDelete = false
IssueRequest(
&RequestTester{
- method: "DELETE",
- uri: "/" + TEST_HASH,
- request_body: TEST_BLOCK,
- api_token: data_manager_token,
+ method: "DELETE",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
+ apiToken: dataManagerToken,
})
type expect struct {
volnum int
// - authenticated /index/prefix request | superuser
//
// The only /index requests that should succeed are those issued by the
-// superuser. They should pass regardless of the value of enforce_permissions.
+// superuser. They should pass regardless of the value of enforcePermissions.
//
func TestIndexHandler(t *testing.T) {
defer teardown()
defer KeepVM.Close()
vols := KeepVM.AllWritable()
- vols[0].Put(TEST_HASH, TEST_BLOCK)
- vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
- vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
- vols[1].Put(TEST_HASH_2+".meta", []byte("metadata"))
+ vols[0].Put(TestHash, TestBlock)
+ vols[1].Put(TestHash2, TestBlock2)
+ vols[0].Put(TestHash+".meta", []byte("metadata"))
+ vols[1].Put(TestHash2+".meta", []byte("metadata"))
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
- unauthenticated_req := &RequestTester{
+ unauthenticatedReq := &RequestTester{
method: "GET",
uri: "/index",
}
- authenticated_req := &RequestTester{
- method: "GET",
- uri: "/index",
- api_token: known_token,
+ authenticatedReq := &RequestTester{
+ method: "GET",
+ uri: "/index",
+ apiToken: knownToken,
}
- superuser_req := &RequestTester{
- method: "GET",
- uri: "/index",
- api_token: data_manager_token,
+ superuserReq := &RequestTester{
+ method: "GET",
+ uri: "/index",
+ apiToken: dataManagerToken,
}
- unauth_prefix_req := &RequestTester{
+ unauthPrefixReq := &RequestTester{
method: "GET",
- uri: "/index/" + TEST_HASH[0:3],
+ uri: "/index/" + TestHash[0:3],
}
- auth_prefix_req := &RequestTester{
- method: "GET",
- uri: "/index/" + TEST_HASH[0:3],
- api_token: known_token,
+ authPrefixReq := &RequestTester{
+ method: "GET",
+ uri: "/index/" + TestHash[0:3],
+ apiToken: knownToken,
}
- superuser_prefix_req := &RequestTester{
- method: "GET",
- uri: "/index/" + TEST_HASH[0:3],
- api_token: data_manager_token,
+ superuserPrefixReq := &RequestTester{
+ method: "GET",
+ uri: "/index/" + TestHash[0:3],
+ apiToken: dataManagerToken,
+ }
+ superuserNoSuchPrefixReq := &RequestTester{
+ method: "GET",
+ uri: "/index/abcd",
+ apiToken: dataManagerToken,
+ }
+ superuserInvalidPrefixReq := &RequestTester{
+ method: "GET",
+ uri: "/index/xyz",
+ apiToken: dataManagerToken,
}
// -------------------------------------------------------------
// Only the superuser should be allowed to issue /index requests.
// ---------------------------
- // enforce_permissions enabled
+ // enforcePermissions enabled
// This setting should not affect tests passing.
- enforce_permissions = true
+ enforcePermissions = true
// unauthenticated /index request
// => UnauthorizedError
- response := IssueRequest(unauthenticated_req)
+ response := IssueRequest(unauthenticatedReq)
ExpectStatusCode(t,
- "enforce_permissions on, unauthenticated request",
+ "enforcePermissions on, unauthenticated request",
UnauthorizedError.HTTPCode,
response)
// unauthenticated /index/prefix request
// => UnauthorizedError
- response = IssueRequest(unauth_prefix_req)
+ response = IssueRequest(unauthPrefixReq)
ExpectStatusCode(t,
"permissions on, unauthenticated /index/prefix request",
UnauthorizedError.HTTPCode,
// authenticated /index request, non-superuser
// => UnauthorizedError
- response = IssueRequest(authenticated_req)
+ response = IssueRequest(authenticatedReq)
ExpectStatusCode(t,
"permissions on, authenticated request, non-superuser",
UnauthorizedError.HTTPCode,
// authenticated /index/prefix request, non-superuser
// => UnauthorizedError
- response = IssueRequest(auth_prefix_req)
+ response = IssueRequest(authPrefixReq)
ExpectStatusCode(t,
"permissions on, authenticated /index/prefix request, non-superuser",
UnauthorizedError.HTTPCode,
// superuser /index request
// => OK
- response = IssueRequest(superuser_req)
+ response = IssueRequest(superuserReq)
ExpectStatusCode(t,
"permissions on, superuser request",
http.StatusOK,
response)
// ----------------------------
- // enforce_permissions disabled
+ // enforcePermissions disabled
// Valid Request should still pass.
- enforce_permissions = false
+ enforcePermissions = false
// superuser /index request
// => OK
- response = IssueRequest(superuser_req)
+ response = IssueRequest(superuserReq)
ExpectStatusCode(t,
"permissions on, superuser request",
http.StatusOK,
response)
- expected := `^` + TEST_HASH + `\+\d+ \d+\n` +
- TEST_HASH_2 + `\+\d+ \d+\n\n$`
+ expected := `^` + TestHash + `\+\d+ \d+\n` +
+ TestHash2 + `\+\d+ \d+\n\n$`
match, _ := regexp.MatchString(expected, response.Body.String())
if !match {
t.Errorf(
// superuser /index/prefix request
// => OK
- response = IssueRequest(superuser_prefix_req)
+ response = IssueRequest(superuserPrefixReq)
ExpectStatusCode(t,
"permissions on, superuser request",
http.StatusOK,
response)
- expected = `^` + TEST_HASH + `\+\d+ \d+\n\n$`
+ expected = `^` + TestHash + `\+\d+ \d+\n\n$`
match, _ = regexp.MatchString(expected, response.Body.String())
if !match {
t.Errorf(
"permissions on, superuser /index/prefix request: expected %s, got:\n%s",
expected, response.Body.String())
}
+
+ // superuser /index/{no-such-prefix} request
+ // => OK
+ response = IssueRequest(superuserNoSuchPrefixReq)
+ ExpectStatusCode(t,
+ "permissions on, superuser request",
+ http.StatusOK,
+ response)
+
+ if "\n" != response.Body.String() {
+ t.Errorf("Expected empty response for %s. Found %s", superuserNoSuchPrefixReq.uri, response.Body.String())
+ }
+
+ // superuser /index/{invalid-prefix} request
+ // => StatusBadRequest
+ response = IssueRequest(superuserInvalidPrefixReq)
+ ExpectStatusCode(t,
+ "permissions on, superuser request",
+ http.StatusBadRequest,
+ response)
}
// TestDeleteHandler
defer KeepVM.Close()
vols := KeepVM.AllWritable()
- vols[0].Put(TEST_HASH, TEST_BLOCK)
+ vols[0].Put(TestHash, TestBlock)
- // Explicitly set the blob_signature_ttl to 0 for these
+ // Explicitly set the blobSignatureTTL to 0 for these
// tests, to ensure the MockVolume deletes the blocks
// even though they have just been created.
- blob_signature_ttl = time.Duration(0)
+ blobSignatureTTL = time.Duration(0)
- var user_token = "NOT DATA MANAGER TOKEN"
- data_manager_token = "DATA MANAGER TOKEN"
+ var userToken = "NOT DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
- never_delete = false
+ neverDelete = false
- unauth_req := &RequestTester{
+ unauthReq := &RequestTester{
method: "DELETE",
- uri: "/" + TEST_HASH,
+ uri: "/" + TestHash,
}
- user_req := &RequestTester{
- method: "DELETE",
- uri: "/" + TEST_HASH,
- api_token: user_token,
+ userReq := &RequestTester{
+ method: "DELETE",
+ uri: "/" + TestHash,
+ apiToken: userToken,
}
- superuser_existing_block_req := &RequestTester{
- method: "DELETE",
- uri: "/" + TEST_HASH,
- api_token: data_manager_token,
+ superuserExistingBlockReq := &RequestTester{
+ method: "DELETE",
+ uri: "/" + TestHash,
+ apiToken: dataManagerToken,
}
- superuser_nonexistent_block_req := &RequestTester{
- method: "DELETE",
- uri: "/" + TEST_HASH_2,
- api_token: data_manager_token,
+ superuserNonexistentBlockReq := &RequestTester{
+ method: "DELETE",
+ uri: "/" + TestHash2,
+ apiToken: dataManagerToken,
}
// Unauthenticated request returns PermissionError.
var response *httptest.ResponseRecorder
- response = IssueRequest(unauth_req)
+ response = IssueRequest(unauthReq)
ExpectStatusCode(t,
"unauthenticated request",
PermissionError.HTTPCode,
response)
// Authenticated non-admin request returns PermissionError.
- response = IssueRequest(user_req)
+ response = IssueRequest(userReq)
ExpectStatusCode(t,
"authenticated non-admin request",
PermissionError.HTTPCode,
Deleted int `json:"copies_deleted"`
Failed int `json:"copies_failed"`
}
- var response_dc, expected_dc deletecounter
+ var responseDc, expectedDc deletecounter
- response = IssueRequest(superuser_nonexistent_block_req)
+ response = IssueRequest(superuserNonexistentBlockReq)
ExpectStatusCode(t,
"data manager request, nonexistent block",
http.StatusNotFound,
response)
- // Authenticated admin request for existing block while never_delete is set.
- never_delete = true
- response = IssueRequest(superuser_existing_block_req)
+ // Authenticated admin request for existing block while neverDelete is set.
+ neverDelete = true
+ response = IssueRequest(superuserExistingBlockReq)
ExpectStatusCode(t,
"authenticated request, existing block, method disabled",
MethodDisabledError.HTTPCode,
response)
- never_delete = false
+ neverDelete = false
// Authenticated admin request for existing block.
- response = IssueRequest(superuser_existing_block_req)
+ response = IssueRequest(superuserExistingBlockReq)
ExpectStatusCode(t,
"data manager request, existing block",
http.StatusOK,
response)
// Expect response {"copies_deleted":1,"copies_failed":0}
- expected_dc = deletecounter{1, 0}
- json.NewDecoder(response.Body).Decode(&response_dc)
- if response_dc != expected_dc {
- t.Errorf("superuser_existing_block_req\nexpected: %+v\nreceived: %+v",
- expected_dc, response_dc)
+ expectedDc = deletecounter{1, 0}
+ json.NewDecoder(response.Body).Decode(&responseDc)
+ if responseDc != expectedDc {
+ t.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
+ expectedDc, responseDc)
}
// Confirm the block has been deleted
- _, err := vols[0].Get(TEST_HASH)
- var block_deleted = os.IsNotExist(err)
- if !block_deleted {
- t.Error("superuser_existing_block_req: block not deleted")
+ _, err := vols[0].Get(TestHash)
+ var blockDeleted = os.IsNotExist(err)
+ if !blockDeleted {
+ t.Error("superuserExistingBlockReq: block not deleted")
}
- // A DELETE request on a block newer than blob_signature_ttl
+ // A DELETE request on a block newer than blobSignatureTTL
// should return success but leave the block on the volume.
- vols[0].Put(TEST_HASH, TEST_BLOCK)
- blob_signature_ttl = time.Hour
+ vols[0].Put(TestHash, TestBlock)
+ blobSignatureTTL = time.Hour
- response = IssueRequest(superuser_existing_block_req)
+ response = IssueRequest(superuserExistingBlockReq)
ExpectStatusCode(t,
"data manager request, existing block",
http.StatusOK,
response)
// Expect response {"copies_deleted":1,"copies_failed":0}
- expected_dc = deletecounter{1, 0}
- json.NewDecoder(response.Body).Decode(&response_dc)
- if response_dc != expected_dc {
- t.Errorf("superuser_existing_block_req\nexpected: %+v\nreceived: %+v",
- expected_dc, response_dc)
+ expectedDc = deletecounter{1, 0}
+ json.NewDecoder(response.Body).Decode(&responseDc)
+ if responseDc != expectedDc {
+ t.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
+ expectedDc, responseDc)
}
// Confirm the block has NOT been deleted.
- _, err = vols[0].Get(TEST_HASH)
+ _, err = vols[0].Get(TestHash)
if err != nil {
t.Errorf("testing delete on new block: %s\n", err)
}
func TestPullHandler(t *testing.T) {
defer teardown()
- var user_token = "USER TOKEN"
- data_manager_token = "DATA MANAGER TOKEN"
+ var userToken = "USER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
pullq = NewWorkQueue()
- good_json := []byte(`[
+ goodJSON := []byte(`[
{
"locator":"locator_with_two_servers",
"servers":[
}
]`)
- bad_json := []byte(`{ "key":"I'm a little teapot" }`)
+ badJSON := []byte(`{ "key":"I'm a little teapot" }`)
type pullTest struct {
- name string
- req RequestTester
- response_code int
- response_body string
+ name string
+ req RequestTester
+ responseCode int
+ responseBody string
}
var testcases = []pullTest{
{
"Valid pull list from an ordinary user",
- RequestTester{"/pull", user_token, "PUT", good_json},
+ RequestTester{"/pull", userToken, "PUT", goodJSON},
http.StatusUnauthorized,
"Unauthorized\n",
},
{
"Invalid pull request from an ordinary user",
- RequestTester{"/pull", user_token, "PUT", bad_json},
+ RequestTester{"/pull", userToken, "PUT", badJSON},
http.StatusUnauthorized,
"Unauthorized\n",
},
{
"Valid pull request from the data manager",
- RequestTester{"/pull", data_manager_token, "PUT", good_json},
+ RequestTester{"/pull", dataManagerToken, "PUT", goodJSON},
http.StatusOK,
"Received 3 pull requests\n",
},
{
"Invalid pull request from the data manager",
- RequestTester{"/pull", data_manager_token, "PUT", bad_json},
+ RequestTester{"/pull", dataManagerToken, "PUT", badJSON},
http.StatusBadRequest,
"",
},
for _, tst := range testcases {
response := IssueRequest(&tst.req)
- ExpectStatusCode(t, tst.name, tst.response_code, response)
- ExpectBody(t, tst.name, tst.response_body, response)
+ ExpectStatusCode(t, tst.name, tst.responseCode, response)
+ ExpectBody(t, tst.name, tst.responseBody, response)
}
// The Keep pull manager should have received one good list with 3
func TestTrashHandler(t *testing.T) {
defer teardown()
- var user_token = "USER TOKEN"
- data_manager_token = "DATA MANAGER TOKEN"
+ var userToken = "USER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
trashq = NewWorkQueue()
- good_json := []byte(`[
+ goodJSON := []byte(`[
{
"locator":"block1",
"block_mtime":1409082153
}
]`)
- bad_json := []byte(`I am not a valid JSON string`)
+ badJSON := []byte(`I am not a valid JSON string`)
type trashTest struct {
- name string
- req RequestTester
- response_code int
- response_body string
+ name string
+ req RequestTester
+ responseCode int
+ responseBody string
}
var testcases = []trashTest{
{
"Valid trash list from an ordinary user",
- RequestTester{"/trash", user_token, "PUT", good_json},
+ RequestTester{"/trash", userToken, "PUT", goodJSON},
http.StatusUnauthorized,
"Unauthorized\n",
},
{
"Invalid trash list from an ordinary user",
- RequestTester{"/trash", user_token, "PUT", bad_json},
+ RequestTester{"/trash", userToken, "PUT", badJSON},
http.StatusUnauthorized,
"Unauthorized\n",
},
{
"Valid trash list from the data manager",
- RequestTester{"/trash", data_manager_token, "PUT", good_json},
+ RequestTester{"/trash", dataManagerToken, "PUT", goodJSON},
http.StatusOK,
"Received 3 trash requests\n",
},
{
"Invalid trash list from the data manager",
- RequestTester{"/trash", data_manager_token, "PUT", bad_json},
+ RequestTester{"/trash", dataManagerToken, "PUT", badJSON},
http.StatusBadRequest,
"",
},
for _, tst := range testcases {
response := IssueRequest(&tst.req)
- ExpectStatusCode(t, tst.name, tst.response_code, response)
- ExpectBody(t, tst.name, tst.response_body, response)
+ ExpectStatusCode(t, tst.name, tst.responseCode, response)
+ ExpectBody(t, tst.name, tst.responseBody, response)
}
// The trash collector should have received one good list with 3
// REST router. It returns the HTTP response to the request.
func IssueRequest(rt *RequestTester) *httptest.ResponseRecorder {
response := httptest.NewRecorder()
- body := bytes.NewReader(rt.request_body)
+ body := bytes.NewReader(rt.requestBody)
req, _ := http.NewRequest(rt.method, rt.uri, body)
- if rt.api_token != "" {
- req.Header.Set("Authorization", "OAuth2 "+rt.api_token)
+ if rt.apiToken != "" {
+ req.Header.Set("Authorization", "OAuth2 "+rt.apiToken)
}
loggingRouter := MakeLoggingRESTRouter()
loggingRouter.ServeHTTP(response, req)
func ExpectStatusCode(
t *testing.T,
testname string,
- expected_status int,
+ expectedStatus int,
response *httptest.ResponseRecorder) {
- if response.Code != expected_status {
+ if response.Code != expectedStatus {
t.Errorf("%s: expected status %d, got %+v",
- testname, expected_status, response)
+ testname, expectedStatus, response)
}
}
func ExpectBody(
t *testing.T,
testname string,
- expected_body string,
+ expectedBody string,
response *httptest.ResponseRecorder) {
- if expected_body != "" && response.Body.String() != expected_body {
+ if expectedBody != "" && response.Body.String() != expectedBody {
t.Errorf("%s: expected response body '%s', got %+v",
- testname, expected_body, response)
+ testname, expectedBody, response)
}
}
defer func(orig *bufferPool) {
bufs = orig
}(bufs)
- bufs = newBufferPool(1, BLOCKSIZE)
+ bufs = newBufferPool(1, BlockSize)
ok := make(chan struct{})
go func() {
for i := 0; i < 2; i++ {
response := IssueRequest(
&RequestTester{
- method: "PUT",
- uri: "/" + TEST_HASH,
- request_body: TEST_BLOCK,
+ method: "PUT",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
})
ExpectStatusCode(t,
"TestPutNeedsOnlyOneBuffer", http.StatusOK, response)
ok := make(chan bool)
go func() {
- for i := 0; i < maxBuffers+1; i += 1 {
+ for i := 0; i < maxBuffers+1; i++ {
// Unauthenticated request, no server key
// => OK (unsigned response)
- unsigned_locator := "/" + TEST_HASH
+ unsignedLocator := "/" + TestHash
response := IssueRequest(
&RequestTester{
- method: "PUT",
- uri: unsigned_locator,
- request_body: TEST_BLOCK,
+ method: "PUT",
+ uri: unsignedLocator,
+ requestBody: TestBlock,
})
ExpectStatusCode(t,
"TestPutHandlerBufferleak", http.StatusOK, response)
ExpectBody(t,
"TestPutHandlerBufferleak",
- TEST_HASH_PUT_RESPONSE, response)
+ TestHashPutResp, response)
}
ok <- true
}()
defer KeepVM.Close()
vols := KeepVM.AllWritable()
- if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
+ if err := vols[0].Put(TestHash, TestBlock); err != nil {
t.Error(err)
}
ok := make(chan bool)
go func() {
- for i := 0; i < maxBuffers+1; i += 1 {
+ for i := 0; i < maxBuffers+1; i++ {
// Unauthenticated request, unsigned locator
// => OK
- unsigned_locator := "/" + TEST_HASH
+ unsignedLocator := "/" + TestHash
response := IssueRequest(
&RequestTester{
method: "GET",
- uri: unsigned_locator,
+ uri: unsignedLocator,
})
ExpectStatusCode(t,
"Unauthenticated request, unsigned locator", http.StatusOK, response)
ExpectBody(t,
"Unauthenticated request, unsigned locator",
- string(TEST_BLOCK),
+ string(TestBlock),
response)
}
ok <- true
case <-ok:
}
}
+
+func TestPutReplicationHeader(t *testing.T) {
+ defer teardown()
+
+ KeepVM = MakeTestVolumeManager(2)
+ defer KeepVM.Close()
+
+ resp := IssueRequest(&RequestTester{
+ method: "PUT",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
+ })
+ if r := resp.Header().Get("X-Keep-Replicas-Stored"); r != "1" {
+ t.Errorf("Got X-Keep-Replicas-Stored: %q, expected %q", r, "1")
+ }
+}
return rest
}
+// BadRequestHandler is a HandleFunc to address bad requests.
func BadRequestHandler(w http.ResponseWriter, r *http.Request) {
http.Error(w, BadRequestError.Error(), BadRequestError.HTTPCode)
}
+// GetBlockHandler is a HandleFunc to address Get block requests.
func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
- if enforce_permissions {
+ if enforcePermissions {
locator := req.URL.Path[1:] // strip leading slash
if err := VerifySignature(locator, GetApiToken(req)); err != nil {
http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
resp.Write(block)
}
+// PutBlockHandler is a HandleFunc to address Put block requests.
func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
hash := mux.Vars(req)["hash"]
return
}
- if req.ContentLength > BLOCKSIZE {
+ if req.ContentLength > BlockSize {
http.Error(resp, TooLongError.Error(), TooLongError.HTTPCode)
return
}
return
}
- err = PutBlock(buf, hash)
+ replication, err := PutBlock(buf, hash)
bufs.Put(buf)
if err != nil {
// Success; add a size hint, sign the locator if possible, and
// return it to the client.
- return_hash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
- api_token := GetApiToken(req)
- if PermissionSecret != nil && api_token != "" {
- expiry := time.Now().Add(blob_signature_ttl)
- return_hash = SignLocator(return_hash, api_token, expiry)
- }
- resp.Write([]byte(return_hash + "\n"))
+ returnHash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
+ apiToken := GetApiToken(req)
+ if PermissionSecret != nil && apiToken != "" {
+ expiry := time.Now().Add(blobSignatureTTL)
+ returnHash = SignLocator(returnHash, apiToken, expiry)
+ }
+ resp.Header().Set("X-Keep-Replicas-Stored", strconv.Itoa(replication))
+ resp.Write([]byte(returnHash + "\n"))
}
-// IndexHandler
-// A HandleFunc to address /index and /index/{prefix} requests.
-//
+// IndexHandler is a HandleFunc to address /index and /index/{prefix} requests.
func IndexHandler(resp http.ResponseWriter, req *http.Request) {
// Reject unauthorized requests.
if !IsDataManagerToken(GetApiToken(req)) {
// * device_num (an integer identifying the underlying filesystem)
// * bytes_free
// * bytes_used
-//
-type VolumeStatus struct {
- MountPoint string `json:"mount_point"`
- DeviceNum uint64 `json:"device_num"`
- BytesFree uint64 `json:"bytes_free"`
- BytesUsed uint64 `json:"bytes_used"`
-}
+// PoolStatus struct
type PoolStatus struct {
Alloc uint64 `json:"BytesAllocated"`
Cap int `json:"BuffersMax"`
Len int `json:"BuffersInUse"`
}
+// NodeStatus struct
type NodeStatus struct {
Volumes []*VolumeStatus `json:"volumes"`
BufferPool PoolStatus
var st NodeStatus
var stLock sync.Mutex
+// StatusHandler addresses /status.json requests.
func StatusHandler(resp http.ResponseWriter, req *http.Request) {
stLock.Lock()
readNodeStatus(&st)
return
}
- if never_delete {
+ if neverDelete {
http.Error(resp, MethodDisabledError.Error(), MethodDisabledError.HTTPCode)
return
}
If the JSON unmarshalling fails, return 400 Bad Request.
*/
+// PullRequest consists of a block locator and an ordered list of servers
type PullRequest struct {
Locator string `json:"locator"`
Servers []string `json:"servers"`
}
+// PullHandler processes "PUT /pull" requests for the data manager.
func PullHandler(resp http.ResponseWriter, req *http.Request) {
// Reject unauthorized requests.
if !IsDataManagerToken(GetApiToken(req)) {
pullq.ReplaceQueue(plist)
}
+// TrashRequest consists of a block locator and it's Mtime
type TrashRequest struct {
Locator string `json:"locator"`
BlockMtime int64 `json:"block_mtime"`
}
+// TrashHandler processes /trash requests.
func TrashHandler(resp http.ResponseWriter, req *http.Request) {
// Reject unauthorized requests.
if !IsDataManagerToken(GetApiToken(req)) {
// should be the only part of the code that cares about which volume a
// block is stored on, so it should be responsible for figuring out
// which volume to check for fetching blocks, storing blocks, etc.
-
// ==============================
+
// GetBlock fetches and returns the block identified by "hash".
//
// On success, GetBlock returns a byte slice with the block data, and
// If the block found does not have the correct MD5 hash, returns
// DiskHashError.
//
-
func GetBlock(hash string) ([]byte, error) {
// Attempt to read the requested hash from a keep volume.
- error_to_caller := NotFoundError
+ errorToCaller := NotFoundError
for _, vol := range KeepVM.AllReadable() {
buf, err := vol.Get(hash)
// this.
log.Printf("%s: checksum mismatch for request %s (actual %s)",
vol, hash, filehash)
- error_to_caller = DiskHashError
+ errorToCaller = DiskHashError
bufs.Put(buf)
continue
}
- if error_to_caller == DiskHashError {
+ if errorToCaller == DiskHashError {
log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned",
vol, hash)
}
return buf, nil
}
- return nil, error_to_caller
+ return nil, errorToCaller
}
-/* PutBlock(block, hash)
- Stores the BLOCK (identified by the content id HASH) in Keep.
-
- The MD5 checksum of the block must be identical to the content id HASH.
- If not, an error is returned.
-
- PutBlock stores the BLOCK on the first Keep volume with free space.
- A failure code is returned to the user only if all volumes fail.
-
- On success, PutBlock returns nil.
- On failure, it returns a KeepError with one of the following codes:
-
- 500 Collision
- A different block with the same hash already exists on this
- Keep server.
- 422 MD5Fail
- The MD5 hash of the BLOCK does not match the argument HASH.
- 503 Full
- There was not enough space left in any Keep volume to store
- the object.
- 500 Fail
- The object could not be stored for some other reason (e.g.
- all writes failed). The text of the error message should
- provide as much detail as possible.
-*/
-
-func PutBlock(block []byte, hash string) error {
+// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
+//
+// PutBlock(block, hash)
+// Stores the BLOCK (identified by the content id HASH) in Keep.
+//
+// The MD5 checksum of the block must be identical to the content id HASH.
+// If not, an error is returned.
+//
+// PutBlock stores the BLOCK on the first Keep volume with free space.
+// A failure code is returned to the user only if all volumes fail.
+//
+// On success, PutBlock returns nil.
+// On failure, it returns a KeepError with one of the following codes:
+//
+// 500 Collision
+// A different block with the same hash already exists on this
+// Keep server.
+// 422 MD5Fail
+// The MD5 hash of the BLOCK does not match the argument HASH.
+// 503 Full
+// There was not enough space left in any Keep volume to store
+// the object.
+// 500 Fail
+// The object could not be stored for some other reason (e.g.
+// all writes failed). The text of the error message should
+// provide as much detail as possible.
+//
+func PutBlock(block []byte, hash string) (int, error) {
// Check that BLOCK's checksum matches HASH.
blockhash := fmt.Sprintf("%x", md5.Sum(block))
if blockhash != hash {
log.Printf("%s: MD5 checksum %s did not match request", hash, blockhash)
- return RequestHashError
+ return 0, RequestHashError
}
// If we already have this data, it's intact on disk, and we
// can update its timestamp, return success. If we have
// different data with the same hash, return failure.
- if err := CompareAndTouch(hash, block); err == nil || err == CollisionError {
- return err
+ if n, err := CompareAndTouch(hash, block); err == nil || err == CollisionError {
+ return n, err
}
// Choose a Keep volume to write to.
// If this volume fails, try all of the volumes in order.
if vol := KeepVM.NextWritable(); vol != nil {
if err := vol.Put(hash, block); err == nil {
- return nil // success!
+ return vol.Replication(), nil // success!
}
}
writables := KeepVM.AllWritable()
if len(writables) == 0 {
log.Print("No writable volumes.")
- return FullError
+ return 0, FullError
}
allFull := true
for _, vol := range writables {
err := vol.Put(hash, block)
if err == nil {
- return nil // success!
+ return vol.Replication(), nil // success!
}
if err != FullError {
// The volume is not full but the
if allFull {
log.Print("All volumes are full.")
- return FullError
- } else {
- // Already logged the non-full errors.
- return GenericError
+ return 0, FullError
}
+ // Already logged the non-full errors.
+ return 0, GenericError
}
-// CompareAndTouch returns nil if one of the volumes already has the
-// given content and it successfully updates the relevant block's
-// modification time in order to protect it from premature garbage
-// collection.
-func CompareAndTouch(hash string, buf []byte) error {
+// CompareAndTouch returns the current replication level if one of the
+// volumes already has the given content and it successfully updates
+// the relevant block's modification time in order to protect it from
+// premature garbage collection. Otherwise, it returns a non-nil
+// error.
+func CompareAndTouch(hash string, buf []byte) (int, error) {
var bestErr error = NotFoundError
for _, vol := range KeepVM.AllWritable() {
if err := vol.Compare(hash, buf); err == CollisionError {
// both, so there's no point writing it even
// on a different volume.)
log.Printf("%s: Compare(%s): %s", vol, hash, err)
- return err
+ return 0, err
} else if os.IsNotExist(err) {
// Block does not exist. This is the only
// "normal" error: we don't log anything.
continue
}
// Compare and Touch both worked --> done.
- return nil
+ return vol.Replication(), nil
}
- return bestErr
+ return 0, bestErr
}
var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
-// IsValidLocator
-// Return true if the specified string is a valid Keep locator.
-// When Keep is extended to support hash types other than MD5,
-// this should be updated to cover those as well.
+// IsValidLocator returns true if the specified string is a valid Keep locator.
+// When Keep is extended to support hash types other than MD5,
+// this should be updated to cover those as well.
//
func IsValidLocator(loc string) bool {
return validLocatorRe.MatchString(loc)
}
// IsExpired returns true if the given Unix timestamp (expressed as a
-// hexadecimal string) is in the past, or if timestamp_hex cannot be
+// hexadecimal string) is in the past, or if timestampHex cannot be
// parsed as a hexadecimal string.
-func IsExpired(timestamp_hex string) bool {
- ts, err := strconv.ParseInt(timestamp_hex, 16, 0)
+func IsExpired(timestampHex string) bool {
+ ts, err := strconv.ParseInt(timestampHex, 16, 0)
if err != nil {
log.Printf("IsExpired: %s", err)
return true
return time.Unix(ts, 0).Before(time.Now())
}
-// CanDelete returns true if the user identified by api_token is
+// CanDelete returns true if the user identified by apiToken is
// allowed to delete blocks.
-func CanDelete(api_token string) bool {
- if api_token == "" {
+func CanDelete(apiToken string) bool {
+ if apiToken == "" {
return false
}
// Blocks may be deleted only when Keep has been configured with a
// data manager.
- if IsDataManagerToken(api_token) {
+ if IsDataManagerToken(apiToken) {
return true
}
- // TODO(twp): look up api_token with the API server
+ // TODO(twp): look up apiToken with the API server
// return true if is_admin is true and if the token
// has unlimited scope
return false
}
-// IsDataManagerToken returns true if api_token represents the data
+// IsDataManagerToken returns true if apiToken represents the data
// manager's token.
-func IsDataManagerToken(api_token string) bool {
- return data_manager_token != "" && api_token == data_manager_token
+func IsDataManagerToken(apiToken string) bool {
+ return dataManagerToken != "" && apiToken == dataManagerToken
}
--- /dev/null
+package main
+
+import (
+ "bytes"
+ "testing"
+)
+
+// A TestableVolumeManagerFactory creates a volume manager with at least two TestableVolume instances.
+// The factory function, and the TestableVolume instances it returns, can use "t" to write
+// logs, fail the current test, etc.
+type TestableVolumeManagerFactory func(t *testing.T) (*RRVolumeManager, []TestableVolume)
+
+// DoHandlersWithGenericVolumeTests runs a set of handler tests with a
+// Volume Manager comprised of TestableVolume instances.
+// It calls factory to create a volume manager with TestableVolume
+// instances for each test case, to avoid leaking state between tests.
+func DoHandlersWithGenericVolumeTests(t *testing.T, factory TestableVolumeManagerFactory) {
+ testGetBlock(t, factory, TestHash, TestBlock)
+ testGetBlock(t, factory, EmptyHash, EmptyBlock)
+ testPutRawBadDataGetBlock(t, factory, TestHash, TestBlock, []byte("baddata"))
+ testPutRawBadDataGetBlock(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+ testPutBlock(t, factory, TestHash, TestBlock)
+ testPutBlock(t, factory, EmptyHash, EmptyBlock)
+ testPutBlockCorrupt(t, factory, TestHash, TestBlock, []byte("baddata"))
+ testPutBlockCorrupt(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+}
+
+// Setup RRVolumeManager with TestableVolumes
+func setupHandlersWithGenericVolumeTest(t *testing.T, factory TestableVolumeManagerFactory) []TestableVolume {
+ vm, testableVolumes := factory(t)
+ KeepVM = vm
+
+ for _, v := range testableVolumes {
+ defer v.Teardown()
+ }
+ defer KeepVM.Close()
+
+ return testableVolumes
+}
+
+// Put a block using PutRaw in just one volume and Get it using GetBlock
+func testGetBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
+ testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
+
+ // Put testBlock in one volume
+ testableVolumes[1].PutRaw(testHash, testBlock)
+
+ // Get should pass
+ buf, err := GetBlock(testHash)
+ if err != nil {
+ t.Fatalf("Error while getting block %s", err)
+ }
+ if bytes.Compare(buf, testBlock) != 0 {
+ t.Errorf("Put succeeded but Get returned %+v, expected %+v", buf, testBlock)
+ }
+}
+
+// Put a bad block using PutRaw and get it.
+func testPutRawBadDataGetBlock(t *testing.T, factory TestableVolumeManagerFactory,
+ testHash string, testBlock []byte, badData []byte) {
+ testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
+
+ // Put bad data for testHash in both volumes
+ testableVolumes[0].PutRaw(testHash, badData)
+ testableVolumes[1].PutRaw(testHash, badData)
+
+ // Get should fail
+ _, err := GetBlock(testHash)
+ if err == nil {
+ t.Fatalf("Expected error while getting corrupt block %v", testHash)
+ }
+}
+
+// Invoke PutBlock twice to ensure CompareAndTouch path is tested.
+func testPutBlock(t *testing.T, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
+ setupHandlersWithGenericVolumeTest(t, factory)
+
+ // PutBlock
+ if _, err := PutBlock(testBlock, testHash); err != nil {
+ t.Fatalf("Error during PutBlock: %s", err)
+ }
+
+ // Check that PutBlock succeeds again even after CompareAndTouch
+ if _, err := PutBlock(testBlock, testHash); err != nil {
+ t.Fatalf("Error during PutBlock: %s", err)
+ }
+
+ // Check that PutBlock stored the data as expected
+ buf, err := GetBlock(testHash)
+ if err != nil {
+ t.Fatalf("Error during GetBlock for %q: %s", testHash, err)
+ } else if bytes.Compare(buf, testBlock) != 0 {
+ t.Errorf("Get response incorrect. Expected %q; found %q", testBlock, buf)
+ }
+}
+
+// Put a bad block using PutRaw, overwrite it using PutBlock and get it.
+func testPutBlockCorrupt(t *testing.T, factory TestableVolumeManagerFactory,
+ testHash string, testBlock []byte, badData []byte) {
+ testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
+
+ // Put bad data for testHash in both volumes
+ testableVolumes[0].PutRaw(testHash, badData)
+ testableVolumes[1].PutRaw(testHash, badData)
+
+ // Check that PutBlock with good data succeeds
+ if _, err := PutBlock(testBlock, testHash); err != nil {
+ t.Fatalf("Error during PutBlock for %q: %s", testHash, err)
+ }
+
+ // Put succeeded and overwrote the badData in one volume,
+ // and Get should return the testBlock now, ignoring the bad data.
+ buf, err := GetBlock(testHash)
+ if err != nil {
+ t.Fatalf("Error during GetBlock for %q: %s", testHash, err)
+ } else if bytes.Compare(buf, testBlock) != 0 {
+ t.Errorf("Get response incorrect. Expected %q; found %q", testBlock, buf)
+ }
+}
package main
import (
- "bufio"
"bytes"
- "errors"
"flag"
"fmt"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
"os"
"os/signal"
"strings"
- "sync"
"syscall"
"time"
)
// Default TCP address on which to listen for requests.
// Initialized by the --listen flag.
-const DEFAULT_ADDR = ":25107"
+const DefaultAddr = ":25107"
// A Keep "block" is 64MB.
-const BLOCKSIZE = 64 * 1024 * 1024
+const BlockSize = 64 * 1024 * 1024
-// A Keep volume must have at least MIN_FREE_KILOBYTES available
+// A Keep volume must have at least MinFreeKilobytes available
// in order to permit writes.
-const MIN_FREE_KILOBYTES = BLOCKSIZE / 1024
+const MinFreeKilobytes = BlockSize / 1024
-var PROC_MOUNTS = "/proc/mounts"
+// Until #6221 is resolved, never_delete must be true.
+// However, allow it to be false in testing with TestDataManagerToken
+const TestDataManagerToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-// enforce_permissions controls whether permission signatures
+// ProcMounts /proc/mounts
+var ProcMounts = "/proc/mounts"
+
+// enforcePermissions controls whether permission signatures
// should be enforced (affecting GET and DELETE requests).
// Initialized by the -enforce-permissions flag.
-var enforce_permissions bool
+var enforcePermissions bool
-// blob_signature_ttl is the time duration for which new permission
+// blobSignatureTTL is the time duration for which new permission
// signatures (returned by PUT requests) will be valid.
// Initialized by the -permission-ttl flag.
-var blob_signature_ttl time.Duration
+var blobSignatureTTL time.Duration
-// data_manager_token represents the API token used by the
+// dataManagerToken represents the API token used by the
// Data Manager, and is required on certain privileged operations.
// Initialized by the -data-manager-token-file flag.
-var data_manager_token string
+var dataManagerToken string
-// never_delete can be used to prevent the DELETE handler from
+// neverDelete can be used to prevent the DELETE handler from
// actually deleting anything.
-var never_delete = true
+var neverDelete = true
var maxBuffers = 128
var bufs *bufferPool
-// ==========
-// Error types.
+// KeepError types.
//
type KeepError struct {
HTTPCode int
var pullq *WorkQueue
var trashq *WorkQueue
+type volumeSet []Volume
+
var (
flagSerializeIO bool
flagReadonly bool
+ volumes volumeSet
)
-type volumeSet []Volume
-
-func (vs *volumeSet) Set(value string) error {
- if dirs := strings.Split(value, ","); len(dirs) > 1 {
- log.Print("DEPRECATED: using comma-separated volume list.")
- for _, dir := range dirs {
- if err := vs.Set(dir); err != nil {
- return err
- }
- }
- return nil
- }
- if len(value) == 0 || value[0] != '/' {
- return errors.New("Invalid volume: must begin with '/'.")
- }
- if _, err := os.Stat(value); err != nil {
- return err
- }
- var locker sync.Locker
- if flagSerializeIO {
- locker = &sync.Mutex{}
- }
- *vs = append(*vs, &UnixVolume{
- root: value,
- locker: locker,
- readonly: flagReadonly,
- })
- return nil
-}
-
func (vs *volumeSet) String() string {
- s := "["
- for i, v := range *vs {
- if i > 0 {
- s = s + " "
- }
- s = s + v.String()
- }
- return s + "]"
-}
-
-// Discover adds a volume for every directory named "keep" that is
-// located at the top level of a device- or tmpfs-backed mount point
-// other than "/". It returns the number of volumes added.
-func (vs *volumeSet) Discover() int {
- added := 0
- f, err := os.Open(PROC_MOUNTS)
- if err != nil {
- log.Fatalf("opening %s: %s", PROC_MOUNTS, err)
- }
- scanner := bufio.NewScanner(f)
- for scanner.Scan() {
- args := strings.Fields(scanner.Text())
- if err := scanner.Err(); err != nil {
- log.Fatalf("reading %s: %s", PROC_MOUNTS, err)
- }
- dev, mount := args[0], args[1]
- if mount == "/" {
- continue
- }
- if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
- continue
- }
- keepdir := mount + "/keep"
- if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
- continue
- }
- // Set the -readonly flag (but only for this volume)
- // if the filesystem is mounted readonly.
- flagReadonlyWas := flagReadonly
- for _, fsopt := range strings.Split(args[3], ",") {
- if fsopt == "ro" {
- flagReadonly = true
- break
- }
- if fsopt == "rw" {
- break
- }
- }
- vs.Set(keepdir)
- flagReadonly = flagReadonlyWas
- added++
- }
- return added
+ return fmt.Sprintf("%+v", (*vs)[:])
}
// TODO(twp): continue moving as much code as possible out of main
defer log.Println("keepstore exiting, pid", os.Getpid())
var (
- data_manager_token_file string
- listen string
- blob_signing_key_file string
- permission_ttl_sec int
- volumes volumeSet
- pidfile string
+ dataManagerTokenFile string
+ listen string
+ blobSigningKeyFile string
+ permissionTTLSec int
+ pidfile string
)
flag.StringVar(
- &data_manager_token_file,
+ &dataManagerTokenFile,
"data-manager-token-file",
"",
"File with the API token used by the Data Manager. All DELETE "+
"requests or GET /index requests must carry this token.")
flag.BoolVar(
- &enforce_permissions,
+ &enforcePermissions,
"enforce-permissions",
false,
"Enforce permission signatures on requests.")
flag.StringVar(
&listen,
"listen",
- DEFAULT_ADDR,
+ DefaultAddr,
"Listening address, in the form \"host:port\". e.g., 10.0.1.24:8000. Omit the host part to listen on all interfaces.")
flag.BoolVar(
- &never_delete,
+ &neverDelete,
"never-delete",
true,
"If set, nothing will be deleted. HTTP 405 will be returned "+
"for valid DELETE requests.")
flag.StringVar(
- &blob_signing_key_file,
+ &blobSigningKeyFile,
"permission-key-file",
"",
"Synonym for -blob-signing-key-file.")
flag.StringVar(
- &blob_signing_key_file,
+ &blobSigningKeyFile,
"blob-signing-key-file",
"",
"File containing the secret key for generating and verifying "+
"blob permission signatures.")
flag.IntVar(
- &permission_ttl_sec,
+ &permissionTTLSec,
"permission-ttl",
0,
"Synonym for -blob-signature-ttl.")
flag.IntVar(
- &permission_ttl_sec,
+ &permissionTTLSec,
"blob-signature-ttl",
int(time.Duration(2*7*24*time.Hour).Seconds()),
"Lifetime of blob permission signatures. "+
"readonly",
false,
"Do not write, delete, or touch anything on the following volumes.")
- flag.Var(
- &volumes,
- "volumes",
- "Deprecated synonym for -volume.")
- flag.Var(
- &volumes,
- "volume",
- "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
flag.StringVar(
&pidfile,
"pid",
&maxBuffers,
"max-buffers",
maxBuffers,
- fmt.Sprintf("Maximum RAM to use for data buffers, given in multiples of block size (%d MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.", BLOCKSIZE>>20))
+ fmt.Sprintf("Maximum RAM to use for data buffers, given in multiples of block size (%d MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.", BlockSize>>20))
flag.Parse()
- if never_delete != true {
- log.Fatal("never_delete must be true, see #6221")
- }
-
if maxBuffers < 0 {
log.Fatal("-max-buffers must be greater than zero.")
}
- bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+ bufs = newBufferPool(maxBuffers, BlockSize)
if pidfile != "" {
f, err := os.OpenFile(pidfile, os.O_RDWR|os.O_CREATE, 0777)
}
if len(volumes) == 0 {
- if volumes.Discover() == 0 {
+ if (&unixVolumeAdder{&volumes}).Discover() == 0 {
log.Fatal("No volumes found.")
}
}
// Initialize data manager token and permission key.
// If these tokens are specified but cannot be read,
// raise a fatal error.
- if data_manager_token_file != "" {
- if buf, err := ioutil.ReadFile(data_manager_token_file); err == nil {
- data_manager_token = strings.TrimSpace(string(buf))
+ if dataManagerTokenFile != "" {
+ if buf, err := ioutil.ReadFile(dataManagerTokenFile); err == nil {
+ dataManagerToken = strings.TrimSpace(string(buf))
} else {
log.Fatalf("reading data manager token: %s\n", err)
}
}
- if blob_signing_key_file != "" {
- if buf, err := ioutil.ReadFile(blob_signing_key_file); err == nil {
+
+ if neverDelete != true && dataManagerToken != TestDataManagerToken {
+ log.Fatal("never_delete must be true, see #6221")
+ }
+
+ if blobSigningKeyFile != "" {
+ if buf, err := ioutil.ReadFile(blobSigningKeyFile); err == nil {
PermissionSecret = bytes.TrimSpace(buf)
} else {
log.Fatalf("reading permission key: %s\n", err)
}
}
- blob_signature_ttl = time.Duration(permission_ttl_sec) * time.Second
+ blobSignatureTTL = time.Duration(permissionTTLSec) * time.Second
if PermissionSecret == nil {
- if enforce_permissions {
+ if enforcePermissions {
log.Fatal("-enforce-permissions requires a permission key")
} else {
log.Println("Running without a PermissionSecret. Block locators " +
"testing"
)
-var TEST_BLOCK = []byte("The quick brown fox jumps over the lazy dog.")
-var TEST_HASH = "e4d909c290d0fb1ca068ffaddf22cbd0"
-var TEST_HASH_PUT_RESPONSE = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
+var TestBlock = []byte("The quick brown fox jumps over the lazy dog.")
+var TestHash = "e4d909c290d0fb1ca068ffaddf22cbd0"
+var TestHashPutResp = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
-var TEST_BLOCK_2 = []byte("Pack my box with five dozen liquor jugs.")
-var TEST_HASH_2 = "f15ac516f788aec4f30932ffb6395c39"
+var TestBlock2 = []byte("Pack my box with five dozen liquor jugs.")
+var TestHash2 = "f15ac516f788aec4f30932ffb6395c39"
-var TEST_BLOCK_3 = []byte("Now is the time for all good men to come to the aid of their country.")
-var TEST_HASH_3 = "eed29bbffbc2dbe5e5ee0bb71888e61f"
+var TestBlock3 = []byte("Now is the time for all good men to come to the aid of their country.")
+var TestHash3 = "eed29bbffbc2dbe5e5ee0bb71888e61f"
-// BAD_BLOCK is used to test collisions and corruption.
+// BadBlock is used to test collisions and corruption.
// It must not match any test hashes.
-var BAD_BLOCK = []byte("The magic words are squeamish ossifrage.")
+var BadBlock = []byte("The magic words are squeamish ossifrage.")
+
+// Empty block
+var EmptyHash = "d41d8cd98f00b204e9800998ecf8427e"
+var EmptyBlock = []byte("")
// TODO(twp): Tests still to be written
//
defer KeepVM.Close()
vols := KeepVM.AllReadable()
- if err := vols[1].Put(TEST_HASH, TEST_BLOCK); err != nil {
+ if err := vols[1].Put(TestHash, TestBlock); err != nil {
t.Error(err)
}
// Check that GetBlock returns success.
- result, err := GetBlock(TEST_HASH)
+ result, err := GetBlock(TestHash)
if err != nil {
t.Errorf("GetBlock error: %s", err)
}
- if fmt.Sprint(result) != fmt.Sprint(TEST_BLOCK) {
- t.Errorf("expected %s, got %s", TEST_BLOCK, result)
+ if fmt.Sprint(result) != fmt.Sprint(TestBlock) {
+ t.Errorf("expected %s, got %s", TestBlock, result)
}
}
defer KeepVM.Close()
// Check that GetBlock returns failure.
- result, err := GetBlock(TEST_HASH)
+ result, err := GetBlock(TestHash)
if err != NotFoundError {
t.Errorf("Expected NotFoundError, got %v", result)
}
defer KeepVM.Close()
vols := KeepVM.AllReadable()
- vols[0].Put(TEST_HASH, BAD_BLOCK)
+ vols[0].Put(TestHash, BadBlock)
// Check that GetBlock returns failure.
- result, err := GetBlock(TEST_HASH)
+ result, err := GetBlock(TestHash)
if err != DiskHashError {
t.Errorf("Expected DiskHashError, got %v (buf: %v)", err, result)
}
defer KeepVM.Close()
// Check that PutBlock stores the data as expected.
- if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
- t.Fatalf("PutBlock: %v", err)
+ if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+ t.Fatalf("PutBlock: n %d err %v", n, err)
}
vols := KeepVM.AllReadable()
- result, err := vols[1].Get(TEST_HASH)
+ result, err := vols[1].Get(TestHash)
if err != nil {
t.Fatalf("Volume #0 Get returned error: %v", err)
}
- if string(result) != string(TEST_BLOCK) {
+ if string(result) != string(TestBlock) {
t.Fatalf("PutBlock stored '%s', Get retrieved '%s'",
- string(TEST_BLOCK), string(result))
+ string(TestBlock), string(result))
}
}
vols[0].(*MockVolume).Bad = true
// Check that PutBlock stores the data as expected.
- if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
- t.Fatalf("PutBlock: %v", err)
+ if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+ t.Fatalf("PutBlock: n %d err %v", n, err)
}
- result, err := GetBlock(TEST_HASH)
+ result, err := GetBlock(TestHash)
if err != nil {
t.Fatalf("GetBlock: %v", err)
}
- if string(result) != string(TEST_BLOCK) {
+ if string(result) != string(TestBlock) {
t.Error("PutBlock/GetBlock mismatch")
t.Fatalf("PutBlock stored '%s', GetBlock retrieved '%s'",
- string(TEST_BLOCK), string(result))
+ string(TestBlock), string(result))
}
}
// Check that PutBlock returns the expected error when the hash does
// not match the block.
- if err := PutBlock(BAD_BLOCK, TEST_HASH); err != RequestHashError {
- t.Error("Expected RequestHashError, got %v", err)
+ if _, err := PutBlock(BadBlock, TestHash); err != RequestHashError {
+ t.Errorf("Expected RequestHashError, got %v", err)
}
// Confirm that GetBlock fails to return anything.
- if result, err := GetBlock(TEST_HASH); err != NotFoundError {
+ if result, err := GetBlock(TestHash); err != NotFoundError {
t.Errorf("GetBlock succeeded after a corrupt block store (result = %s, err = %v)",
string(result), err)
}
KeepVM = MakeTestVolumeManager(2)
defer KeepVM.Close()
- // Store a corrupted block under TEST_HASH.
+ // Store a corrupted block under TestHash.
vols := KeepVM.AllWritable()
- vols[0].Put(TEST_HASH, BAD_BLOCK)
- if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
- t.Errorf("PutBlock: %v", err)
+ vols[0].Put(TestHash, BadBlock)
+ if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+ t.Errorf("PutBlock: n %d err %v", n, err)
}
- // The block on disk should now match TEST_BLOCK.
- if block, err := GetBlock(TEST_HASH); err != nil {
+ // The block on disk should now match TestBlock.
+ if block, err := GetBlock(TestHash); err != nil {
t.Errorf("GetBlock: %v", err)
- } else if bytes.Compare(block, TEST_BLOCK) != 0 {
+ } else if bytes.Compare(block, TestBlock) != 0 {
t.Errorf("GetBlock returned: '%s'", string(block))
}
}
// Store one block, then attempt to store the other. Confirm that
// PutBlock reported a CollisionError.
- if err := PutBlock(b1, locator); err != nil {
+ if _, err := PutBlock(b1, locator); err != nil {
t.Error(err)
}
- if err := PutBlock(b2, locator); err == nil {
+ if _, err := PutBlock(b2, locator); err == nil {
t.Error("PutBlock did not report a collision")
} else if err != CollisionError {
t.Errorf("PutBlock returned %v", err)
// Store a block and then make the underlying volume bad,
// so a subsequent attempt to update the file timestamp
// will fail.
- vols[0].Put(TEST_HASH, BAD_BLOCK)
- old_mtime, err := vols[0].Mtime(TEST_HASH)
+ vols[0].Put(TestHash, BadBlock)
+ oldMtime, err := vols[0].Mtime(TestHash)
if err != nil {
- t.Fatalf("vols[0].Mtime(%s): %s\n", TEST_HASH, err)
+ t.Fatalf("vols[0].Mtime(%s): %s\n", TestHash, err)
}
// vols[0].Touch will fail on the next call, so the volume
// manager will store a copy on vols[1] instead.
vols[0].(*MockVolume).Touchable = false
- if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
- t.Fatalf("PutBlock: %v", err)
+ if n, err := PutBlock(TestBlock, TestHash); err != nil || n < 1 {
+ t.Fatalf("PutBlock: n %d err %v", n, err)
}
vols[0].(*MockVolume).Touchable = true
// Now the mtime on the block on vols[0] should be unchanged, and
// there should be a copy of the block on vols[1].
- new_mtime, err := vols[0].Mtime(TEST_HASH)
+ newMtime, err := vols[0].Mtime(TestHash)
if err != nil {
- t.Fatalf("vols[0].Mtime(%s): %s\n", TEST_HASH, err)
+ t.Fatalf("vols[0].Mtime(%s): %s\n", TestHash, err)
}
- if !new_mtime.Equal(old_mtime) {
- t.Errorf("mtime was changed on vols[0]:\nold_mtime = %v\nnew_mtime = %v\n",
- old_mtime, new_mtime)
+ if !newMtime.Equal(oldMtime) {
+ t.Errorf("mtime was changed on vols[0]:\noldMtime = %v\nnewMtime = %v\n",
+ oldMtime, newMtime)
}
- result, err := vols[1].Get(TEST_HASH)
+ result, err := vols[1].Get(TestHash)
if err != nil {
t.Fatalf("vols[1]: %v", err)
}
- if bytes.Compare(result, TEST_BLOCK) != 0 {
+ if bytes.Compare(result, TestBlock) != 0 {
t.Errorf("new block does not match test block\nnew block = %v\n", result)
}
}
}
}
- // Set up a bogus PROC_MOUNTS file.
+ // Set up a bogus ProcMounts file.
f, err := ioutil.TempFile("", "keeptest")
if err != nil {
t.Fatal(err)
fmt.Fprintf(f, "tmpfs %s tmpfs %s 0 0\n", path.Dir(vol), opts)
}
f.Close()
- PROC_MOUNTS = f.Name()
+ ProcMounts = f.Name()
- var resultVols volumeSet
- added := resultVols.Discover()
+ resultVols := volumeSet{}
+ added := (&unixVolumeAdder{&resultVols}).Discover()
if added != len(resultVols) {
t.Errorf("Discover returned %d, but added %d volumes",
func TestDiscoverNone(t *testing.T) {
defer teardown()
- // Set up a bogus PROC_MOUNTS file with no Keep vols.
+ // Set up a bogus ProcMounts file with no Keep vols.
f, err := ioutil.TempFile("", "keeptest")
if err != nil {
t.Fatal(err)
fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
f.Close()
- PROC_MOUNTS = f.Name()
+ ProcMounts = f.Name()
- var resultVols volumeSet
- added := resultVols.Discover()
+ resultVols := volumeSet{}
+ added := (&unixVolumeAdder{&resultVols}).Discover()
if added != 0 || len(resultVols) != 0 {
t.Fatalf("got %d, %v; expected 0, []", added, resultVols)
}
defer KeepVM.Close()
vols := KeepVM.AllReadable()
- vols[0].Put(TEST_HASH, TEST_BLOCK)
- vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
- vols[0].Put(TEST_HASH_3, TEST_BLOCK_3)
- vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
- vols[1].Put(TEST_HASH_2+".meta", []byte("metadata"))
+ vols[0].Put(TestHash, TestBlock)
+ vols[1].Put(TestHash2, TestBlock2)
+ vols[0].Put(TestHash3, TestBlock3)
+ vols[0].Put(TestHash+".meta", []byte("metadata"))
+ vols[1].Put(TestHash2+".meta", []byte("metadata"))
buf := new(bytes.Buffer)
vols[0].IndexTo("", buf)
vols[1].IndexTo("", buf)
- index_rows := strings.Split(string(buf.Bytes()), "\n")
- sort.Strings(index_rows)
- sorted_index := strings.Join(index_rows, "\n")
- expected := `^\n` + TEST_HASH + `\+\d+ \d+\n` +
- TEST_HASH_3 + `\+\d+ \d+\n` +
- TEST_HASH_2 + `\+\d+ \d+$`
-
- match, err := regexp.MatchString(expected, sorted_index)
+ indexRows := strings.Split(string(buf.Bytes()), "\n")
+ sort.Strings(indexRows)
+ sortedIndex := strings.Join(indexRows, "\n")
+ expected := `^\n` + TestHash + `\+\d+ \d+\n` +
+ TestHash3 + `\+\d+ \d+\n` +
+ TestHash2 + `\+\d+ \d+$`
+
+ match, err := regexp.MatchString(expected, sortedIndex)
if err == nil {
if !match {
t.Errorf("IndexLocators returned:\n%s", string(buf.Bytes()))
// MakeTestVolumeManager returns a RRVolumeManager with the specified
// number of MockVolumes.
-func MakeTestVolumeManager(num_volumes int) VolumeManager {
- vols := make([]Volume, num_volumes)
+func MakeTestVolumeManager(numVolumes int) VolumeManager {
+ vols := make([]Volume, numVolumes)
for i := range vols {
vols[i] = CreateMockVolume()
}
// teardown cleans up after each test.
func teardown() {
- data_manager_token = ""
- enforce_permissions = false
+ dataManagerToken = ""
+ enforcePermissions = false
PermissionSecret = nil
KeepVM = nil
}
"time"
)
+// LoggingResponseWriter has anonymous fields ResponseWriter and ResponseBody
type LoggingResponseWriter struct {
Status int
Length int
ResponseBody string
}
+// WriteHeader writes header to ResponseWriter
func (loggingWriter *LoggingResponseWriter) WriteHeader(code int) {
loggingWriter.Status = code
loggingWriter.ResponseWriter.WriteHeader(code)
return loggingWriter.ResponseWriter.Write(data)
}
+// LoggingRESTRouter is used to add logging capabilities to mux.Router
type LoggingRESTRouter struct {
router *mux.Router
}
+// MakeLoggingRESTRouter initializes LoggingRESTRouter
func MakeLoggingRESTRouter() *LoggingRESTRouter {
router := MakeRESTRouter()
return (&LoggingRESTRouter{router})
func NewMockMutex() *MockMutex {
return &MockMutex{
- AllowLock: make(chan struct{}),
+ AllowLock: make(chan struct{}),
AllowUnlock: make(chan struct{}),
}
}
// Lock waits for someone to send to AllowLock.
func (m *MockMutex) Lock() {
- <- m.AllowLock
+ <-m.AllowLock
}
// Unlock waits for someone to send to AllowUnlock.
func (m *MockMutex) Unlock() {
- <- m.AllowUnlock
+ <-m.AllowUnlock
}
-/*
-Permissions management on Arvados locator hashes.
-
-The permissions structure for Arvados is as follows (from
-https://arvados.org/issues/2328)
-
-A Keep locator string has the following format:
-
- [hash]+[size]+A[signature]@[timestamp]
-
-The "signature" string here is a cryptographic hash, expressed as a
-string of hexadecimal digits, and timestamp is a 32-bit Unix timestamp
-expressed as a hexadecimal number. e.g.:
-
- acbd18db4cc2f85cedef654fccc4a4d8+3+A257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a@7fffffff
-
-The signature represents a guarantee that this locator was generated
-by either Keep or the API server for use with the supplied API token.
-If a request to Keep includes a locator with a valid signature and is
-accompanied by the proper API token, the user has permission to GET
-that object.
-
-The signature may be generated either by Keep (after the user writes a
-block) or by the API server (if the user has can_read permission on
-the specified object). Keep and API server share a secret that is used
-to generate signatures.
-
-To verify a permission hint, Keep generates a new hint for the
-requested object (using the locator string, the timestamp, the
-permission secret and the user's API token, which must appear in the
-request headers) and compares it against the hint included in the
-request. If the permissions do not match, or if the API token is not
-present, Keep returns a 401 error.
-*/
-
package main
import (
- "crypto/hmac"
- "crypto/sha1"
- "fmt"
- "regexp"
- "strconv"
- "strings"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
"time"
)
// key.
var PermissionSecret []byte
-// MakePermSignature returns a string representing the signed permission
-// hint for the blob identified by blob_hash, api_token and expiration timestamp.
-func MakePermSignature(blob_hash string, api_token string, expiry string) string {
- hmac := hmac.New(sha1.New, PermissionSecret)
- hmac.Write([]byte(blob_hash))
- hmac.Write([]byte("@"))
- hmac.Write([]byte(api_token))
- hmac.Write([]byte("@"))
- hmac.Write([]byte(expiry))
- digest := hmac.Sum(nil)
- return fmt.Sprintf("%x", digest)
-}
-
-// SignLocator takes a blob_locator, an api_token and an expiry time, and
+// SignLocator takes a blobLocator, an apiToken and an expiry time, and
// returns a signed locator string.
-func SignLocator(blob_locator string, api_token string, expiry time.Time) string {
- // If no permission secret or API token is available,
- // return an unsigned locator.
- if PermissionSecret == nil || api_token == "" {
- return blob_locator
- }
- // Extract the hash from the blob locator, omitting any size hint that may be present.
- blob_hash := strings.Split(blob_locator, "+")[0]
- // Return the signed locator string.
- timestamp_hex := fmt.Sprintf("%08x", expiry.Unix())
- return blob_locator +
- "+A" + MakePermSignature(blob_hash, api_token, timestamp_hex) +
- "@" + timestamp_hex
+func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
+ return keepclient.SignLocator(blobLocator, apiToken, expiry, PermissionSecret)
}
-var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
-
-// VerifySignature returns nil if the signature on the signed_locator
-// can be verified using the given api_token. Otherwise it returns
+// VerifySignature returns nil if the signature on the signedLocator
+// can be verified using the given apiToken. Otherwise it returns
// either ExpiredError (if the timestamp has expired, which is
// something the client could have figured out independently) or
// PermissionError.
-func VerifySignature(signed_locator string, api_token string) error {
- matches := signedLocatorRe.FindStringSubmatch(signed_locator)
- if matches == nil {
- // Could not find a permission signature at all
- return PermissionError
- }
- blob_hash := matches[1]
- sig_hex := matches[2]
- exp_hex := matches[3]
- if exp_time, err := ParseHexTimestamp(exp_hex); err != nil {
- return PermissionError
- } else if exp_time.Before(time.Now()) {
+func VerifySignature(signedLocator, apiToken string) error {
+ err := keepclient.VerifySignature(signedLocator, apiToken, PermissionSecret)
+ if err == keepclient.ErrSignatureExpired {
return ExpiredError
- }
- if sig_hex != MakePermSignature(blob_hash, api_token, exp_hex) {
+ } else if err != nil {
return PermissionError
}
return nil
}
-
-func ParseHexTimestamp(timestamp_hex string) (ts time.Time, err error) {
- if ts_int, e := strconv.ParseInt(timestamp_hex, 16, 0); e == nil {
- ts = time.Unix(ts_int, 0)
- } else {
- err = e
- }
- return ts, err
-}
package main
import (
+ "strconv"
"testing"
"time"
)
const (
- known_hash = "acbd18db4cc2f85cedef654fccc4a4d8"
- known_locator = known_hash + "+3"
- known_token = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
- known_key = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
+ knownHash = "acbd18db4cc2f85cedef654fccc4a4d8"
+ knownLocator = knownHash + "+3"
+ knownToken = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
+ knownKey = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
"p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
"ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
"jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
"gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
"vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
"786u5rw2a9gx743dj3fgq2irk"
- known_signature = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
- known_timestamp = "7fffffff"
- known_sig_hint = "+A" + known_signature + "@" + known_timestamp
- known_signed_locator = known_locator + known_sig_hint
+ knownSignature = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+ knownTimestamp = "7fffffff"
+ knownSigHint = "+A" + knownSignature + "@" + knownTimestamp
+ knownSignedLocator = knownLocator + knownSigHint
)
func TestSignLocator(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
+ defer func(b []byte) {
+ PermissionSecret = b
+ }(PermissionSecret)
- if ts, err := ParseHexTimestamp(known_timestamp); err != nil {
- t.Errorf("bad known_timestamp %s", known_timestamp)
- } else {
- if known_signed_locator != SignLocator(known_locator, known_token, ts) {
- t.Fail()
- }
+ tsInt, err := strconv.ParseInt(knownTimestamp, 16, 0)
+ if err != nil {
+ t.Fatal(err)
}
-}
-
-func TestVerifySignature(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
-
- if VerifySignature(known_signed_locator, known_token) != nil {
- t.Fail()
- }
-}
-
-func TestVerifySignatureExtraHints(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
-
- if VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint, known_token) != nil {
- t.Fatal("Verify cannot handle hint before permission signature")
- }
-
- if VerifySignature(known_locator+known_sig_hint+"+Zfoo", known_token) != nil {
- t.Fatal("Verify cannot handle hint after permission signature")
- }
-
- if VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint+"+Zfoo", known_token) != nil {
- t.Fatal("Verify cannot handle hints around permission signature")
- }
-}
-
-// The size hint on the locator string should not affect signature validation.
-func TestVerifySignatureWrongSize(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
+ t0 := time.Unix(tsInt, 0)
- if VerifySignature(known_hash+"+999999"+known_sig_hint, known_token) != nil {
- t.Fatal("Verify cannot handle incorrect size hint")
+ PermissionSecret = []byte(knownKey)
+ if x := SignLocator(knownLocator, knownToken, t0); x != knownSignedLocator {
+ t.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
}
- if VerifySignature(known_hash+known_sig_hint, known_token) != nil {
- t.Fatal("Verify cannot handle missing size hint")
+ PermissionSecret = []byte("arbitrarykey")
+ if x := SignLocator(knownLocator, knownToken, t0); x == knownSignedLocator {
+ t.Fatalf("Got same signature %+q, even though PermissionSecret changed", x)
}
}
-func TestVerifySignatureBadSig(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
+func TestVerifyLocator(t *testing.T) {
+ defer func(b []byte) {
+ PermissionSecret = b
+ }(PermissionSecret)
- bad_locator := known_locator + "+Aaaaaaaaaaaaaaaa@" + known_timestamp
- if VerifySignature(bad_locator, known_token) != PermissionError {
- t.Fail()
+ PermissionSecret = []byte(knownKey)
+ if err := VerifySignature(knownSignedLocator, knownToken); err != nil {
+ t.Fatal(err)
}
-}
-
-func TestVerifySignatureBadTimestamp(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
-
- bad_locator := known_locator + "+A" + known_signature + "@OOOOOOOl"
- if VerifySignature(bad_locator, known_token) != PermissionError {
- t.Fail()
- }
-}
-
-func TestVerifySignatureBadSecret(t *testing.T) {
- PermissionSecret = []byte("00000000000000000000")
- defer func() { PermissionSecret = nil }()
-
- if VerifySignature(known_signed_locator, known_token) != PermissionError {
- t.Fail()
- }
-}
-
-func TestVerifySignatureBadToken(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
-
- if VerifySignature(known_signed_locator, "00000000") != PermissionError {
- t.Fail()
- }
-}
-
-func TestVerifySignatureExpired(t *testing.T) {
- PermissionSecret = []byte(known_key)
- defer func() { PermissionSecret = nil }()
- yesterday := time.Now().AddDate(0, 0, -1)
- expired_locator := SignLocator(known_hash, known_token, yesterday)
- if VerifySignature(expired_locator, known_token) != ExpiredError {
- t.Fail()
+ PermissionSecret = []byte("arbitrarykey")
+ if err := VerifySignature(knownSignedLocator, knownToken); err == nil {
+ t.Fatal("Verified signature even with wrong PermissionSecret")
}
}
"time"
)
-/*
- Keepstore initiates pull worker channel goroutine.
- The channel will process pull list.
- For each (next) pull request:
- For each locator listed, execute Pull on the server(s) listed
- Skip the rest of the servers if no errors
- Repeat
-*/
+// RunPullWorker is used by Keepstore to initiate pull worker channel goroutine.
+// The channel will process pull list.
+// For each (next) pull request:
+// For each locator listed, execute Pull on the server(s) listed
+// Skip the rest of the servers if no errors
+// Repeat
+//
func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
nextItem := pullq.NextItem
for item := range nextItem {
pullRequest := item.(PullRequest)
- err := PullItemAndProcess(item.(PullRequest), GenerateRandomApiToken(), keepClient)
+ err := PullItemAndProcess(item.(PullRequest), GenerateRandomAPIToken(), keepClient)
pullq.DoneItem <- struct{}{}
if err == nil {
log.Printf("Pull %s success", pullRequest)
}
}
-/*
- For each Pull request:
- Generate a random API token.
- Generate a permission signature using this token, timestamp ~60 seconds in the future, and desired block hash.
- Using this token & signature, retrieve the given block.
- Write to storage
-*/
+// PullItemAndProcess pulls items from PullQueue and processes them.
+// For each Pull request:
+// Generate a random API token.
+// Generate a permission signature using this token, timestamp ~60 seconds in the future, and desired block hash.
+// Using this token & signature, retrieve the given block.
+// Write to storage
+//
func PullItemAndProcess(pullRequest PullRequest, token string, keepClient *keepclient.KeepClient) (err error) {
keepClient.Arvados.ApiToken = token
- service_roots := make(map[string]string)
+ serviceRoots := make(map[string]string)
for _, addr := range pullRequest.Servers {
- service_roots[addr] = addr
+ serviceRoots[addr] = addr
}
- keepClient.SetServiceRoots(service_roots, nil, nil)
+ keepClient.SetServiceRoots(serviceRoots, nil, nil)
// Generate signature with a random token
- expires_at := time.Now().Add(60 * time.Second)
- signedLocator := SignLocator(pullRequest.Locator, token, expires_at)
+ expiresAt := time.Now().Add(60 * time.Second)
+ signedLocator := SignLocator(pullRequest.Locator, token, expiresAt)
reader, contentLen, _, err := GetContent(signedLocator, keepClient)
if err != nil {
}
defer reader.Close()
- read_content, err := ioutil.ReadAll(reader)
+ readContent, err := ioutil.ReadAll(reader)
if err != nil {
return err
}
- if (read_content == nil) || (int64(len(read_content)) != contentLen) {
+ if (readContent == nil) || (int64(len(readContent)) != contentLen) {
return errors.New(fmt.Sprintf("Content not found for: %s", signedLocator))
}
- err = PutContent(read_content, pullRequest.Locator)
+ err = PutContent(readContent, pullRequest.Locator)
return
}
return reader, blocklen, url, err
}
-const ALPHA_NUMERIC = "0123456789abcdefghijklmnopqrstuvwxyz"
+const alphaNumeric = "0123456789abcdefghijklmnopqrstuvwxyz"
-func GenerateRandomApiToken() string {
+// GenerateRandomAPIToken generates a random api token
+func GenerateRandomAPIToken() string {
var bytes = make([]byte, 36)
rand.Read(bytes)
for i, b := range bytes {
- bytes[i] = ALPHA_NUMERIC[b%byte(len(ALPHA_NUMERIC))]
+ bytes[i] = alphaNumeric[b%byte(len(alphaNumeric))]
}
return (string(bytes))
}
// Put block
var PutContent = func(content []byte, locator string) (err error) {
- err = PutBlock(content, locator)
+ _, err = PutBlock(content, locator)
return
}
// start api and keep servers
arvadostest.StartAPI()
- arvadostest.StartKeep()
+ arvadostest.StartKeep(2, false)
// make arvadosclient
arv, err := arvadosclient.MakeArvadosClient()
return rdr, int64(len(testData.Content)), "", nil
}
- keepClient.Arvados.ApiToken = GenerateRandomApiToken()
+ keepClient.Arvados.ApiToken = GenerateRandomAPIToken()
err := PullItemAndProcess(pullRequest, keepClient.Arvados.ApiToken, keepClient)
if len(testData.GetError) > 0 {
// When a new pull request arrives, the old one will be overwritten.
// This behavior is verified using these two maps in the
- // "TestPullWorker_pull_list_with_two_items_latest_replacing_old"
+ // "TestPullWorkerPullList_with_two_items_latest_replacing_old"
testPullLists = make(map[string]string)
}
go RunPullWorker(pullq, keepClient)
}
-var first_pull_list = []byte(`[
+var firstPullList = []byte(`[
{
"locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
"servers":[
}
]`)
-var second_pull_list = []byte(`[
+var secondPullList = []byte(`[
{
"locator":"73feffa4b7f6bb68e44cf984c85f6e88+3",
"servers":[
]`)
type PullWorkerTestData struct {
- name string
- req RequestTester
- response_code int
- response_body string
- read_content string
- read_error bool
- put_error bool
+ name string
+ req RequestTester
+ responseCode int
+ responseBody string
+ readContent string
+ readError bool
+ putError bool
}
-func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_two_locators(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_locators(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_pull_list_with_two_locators",
- req: RequestTester{"/pull", data_manager_token, "PUT", first_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 2 pull requests\n",
- read_content: "hello",
- read_error: false,
- put_error: false,
+ name: "TestPullWorkerPullList_with_two_locators",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", firstPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 2 pull requests\n",
+ readContent: "hello",
+ readError: false,
+ putError: false,
}
performTest(testData, c)
}
-func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_one_locator(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_one_locator(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_pull_list_with_one_locator",
- req: RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 1 pull requests\n",
- read_content: "hola",
- read_error: false,
- put_error: false,
+ name: "TestPullWorkerPullList_with_one_locator",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 1 pull requests\n",
+ readContent: "hola",
+ readError: false,
+ putError: false,
}
performTest(testData, c)
func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_one_locator(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_error_on_get_one_locator",
- req: RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 1 pull requests\n",
- read_content: "unused",
- read_error: true,
- put_error: false,
+ name: "TestPullWorker_error_on_get_one_locator",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 1 pull requests\n",
+ readContent: "unused",
+ readError: true,
+ putError: false,
}
performTest(testData, c)
func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_two_locators(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_error_on_get_two_locators",
- req: RequestTester{"/pull", data_manager_token, "PUT", first_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 2 pull requests\n",
- read_content: "unused",
- read_error: true,
- put_error: false,
+ name: "TestPullWorker_error_on_get_two_locators",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", firstPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 2 pull requests\n",
+ readContent: "unused",
+ readError: true,
+ putError: false,
}
performTest(testData, c)
func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_one_locator(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_error_on_put_one_locator",
- req: RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 1 pull requests\n",
- read_content: "hello hello",
- read_error: false,
- put_error: true,
+ name: "TestPullWorker_error_on_put_one_locator",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 1 pull requests\n",
+ readContent: "hello hello",
+ readError: false,
+ putError: true,
}
performTest(testData, c)
func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_two_locators(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_error_on_put_two_locators",
- req: RequestTester{"/pull", data_manager_token, "PUT", first_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 2 pull requests\n",
- read_content: "hello again",
- read_error: false,
- put_error: true,
+ name: "TestPullWorker_error_on_put_two_locators",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", firstPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 2 pull requests\n",
+ readContent: "hello again",
+ readError: false,
+ putError: true,
}
performTest(testData, c)
// is used to check that behavior by first putting an item on the queue,
// and then performing the test. Thus the "testPullLists" has two entries;
// however, processedPullLists will see only the newest item in the list.
-func (s *PullWorkerTestSuite) TestPullWorker_pull_list_with_two_items_latest_replacing_old(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_items_latest_replacing_old(c *C) {
defer teardown()
var firstInput = []int{1}
pullq.ReplaceQueue(makeTestWorkList(firstInput))
testPullLists["Added_before_actual_test_item"] = string(1)
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_pull_list_with_two_items_latest_replacing_old",
- req: RequestTester{"/pull", data_manager_token, "PUT", second_pull_list},
- response_code: http.StatusOK,
- response_body: "Received 1 pull requests\n",
- read_content: "hola de nuevo",
- read_error: false,
- put_error: false,
+ name: "TestPullWorkerPullList_with_two_items_latest_replacing_old",
+ req: RequestTester{"/pull", dataManagerToken, "PUT", secondPullList},
+ responseCode: http.StatusOK,
+ responseBody: "Received 1 pull requests\n",
+ readContent: "hola de nuevo",
+ readError: false,
+ putError: false,
}
performTest(testData, c)
}
// In this case, the item will not be placed on pullq
-func (s *PullWorkerTestSuite) TestPullWorker_invalid_data_manager_token(c *C) {
+func (s *PullWorkerTestSuite) TestPullWorker_invalid_dataManagerToken(c *C) {
defer teardown()
- data_manager_token = "DATA MANAGER TOKEN"
+ dataManagerToken = "DATA MANAGER TOKEN"
testData := PullWorkerTestData{
- name: "TestPullWorker_pull_list_with_two_locators",
- req: RequestTester{"/pull", "invalid_data_manager_token", "PUT", first_pull_list},
- response_code: http.StatusUnauthorized,
- response_body: "Unauthorized\n",
- read_content: "hello",
- read_error: false,
- put_error: false,
+ name: "TestPullWorkerPullList_with_two_locators",
+ req: RequestTester{"/pull", "invalid_dataManagerToken", "PUT", firstPullList},
+ responseCode: http.StatusUnauthorized,
+ responseBody: "Unauthorized\n",
+ readContent: "hello",
+ readError: false,
+ putError: false,
}
performTest(testData, c)
defer pullq.Close()
currentTestData = testData
- testPullLists[testData.name] = testData.response_body
+ testPullLists[testData.name] = testData.responseBody
processedPullLists := make(map[string]string)
}(GetContent)
GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (reader io.ReadCloser, contentLength int64, url string, err error) {
c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(1))
- processedPullLists[testData.name] = testData.response_body
- if testData.read_error {
+ processedPullLists[testData.name] = testData.responseBody
+ if testData.readError {
err = errors.New("Error getting data")
readError = err
return nil, 0, "", err
- } else {
- readContent = testData.read_content
- cb := &ClosingBuffer{bytes.NewBufferString(testData.read_content)}
- var rc io.ReadCloser
- rc = cb
- return rc, int64(len(testData.read_content)), "", nil
}
+ readContent = testData.readContent
+ cb := &ClosingBuffer{bytes.NewBufferString(testData.readContent)}
+ var rc io.ReadCloser
+ rc = cb
+ return rc, int64(len(testData.readContent)), "", nil
}
// Override PutContent to mock PutBlock functionality
defer func(orig func([]byte, string) error) { PutContent = orig }(PutContent)
PutContent = func(content []byte, locator string) (err error) {
- if testData.put_error {
+ if testData.putError {
err = errors.New("Error putting data")
putError = err
return err
- } else {
- putContent = content
- return nil
}
+ putContent = content
+ return nil
}
c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(0))
c.Assert(getStatusItem("PullQueue", "Queued"), Equals, float64(0))
response := IssueRequest(&testData.req)
- c.Assert(response.Code, Equals, testData.response_code)
- c.Assert(response.Body.String(), Equals, testData.response_body)
+ c.Assert(response.Code, Equals, testData.responseCode)
+ c.Assert(response.Body.String(), Equals, testData.responseBody)
expectEqualWithin(c, time.Second, 0, func() interface{} {
st := pullq.Status()
return st.InProgress + st.Queued
})
- if testData.name == "TestPullWorker_pull_list_with_two_items_latest_replacing_old" {
+ if testData.name == "TestPullWorkerPullList_with_two_items_latest_replacing_old" {
c.Assert(len(testPullLists), Equals, 2)
c.Assert(len(processedPullLists), Equals, 1)
c.Assert(testPullLists["Added_before_actual_test_item"], NotNil)
- c.Assert(testPullLists["TestPullWorker_pull_list_with_two_items_latest_replacing_old"], NotNil)
- c.Assert(processedPullLists["TestPullWorker_pull_list_with_two_items_latest_replacing_old"], NotNil)
+ c.Assert(testPullLists["TestPullWorkerPullList_with_two_items_latest_replacing_old"], NotNil)
+ c.Assert(processedPullLists["TestPullWorkerPullList_with_two_items_latest_replacing_old"], NotNil)
} else {
- if testData.response_code == http.StatusOK {
+ if testData.responseCode == http.StatusOK {
c.Assert(len(testPullLists), Equals, 1)
c.Assert(len(processedPullLists), Equals, 1)
c.Assert(testPullLists[testData.name], NotNil)
}
}
- if testData.read_error {
+ if testData.readError {
c.Assert(readError, NotNil)
- } else if testData.response_code == http.StatusOK {
+ } else if testData.responseCode == http.StatusOK {
c.Assert(readError, IsNil)
- c.Assert(readContent, Equals, testData.read_content)
- if testData.put_error {
+ c.Assert(readContent, Equals, testData.readContent)
+ if testData.putError {
c.Assert(putError, NotNil)
} else {
c.Assert(putError, IsNil)
- c.Assert(string(putContent), Equals, testData.read_content)
+ c.Assert(string(putContent), Equals, testData.readContent)
}
}
"time"
)
-/*
- Keepstore initiates trash worker channel goroutine.
- The channel will process trash list.
- For each (next) trash request:
- Delete the block indicated by the trash request Locator
- Repeat
-*/
-
+// RunTrashWorker is used by Keepstore to initiate trash worker channel goroutine.
+// The channel will process trash list.
+// For each (next) trash request:
+// Delete the block indicated by the trash request Locator
+// Repeat
+//
func RunTrashWorker(trashq *WorkQueue) {
for item := range trashq.NextItem {
trashRequest := item.(TrashRequest)
// TrashItem deletes the indicated block from every writable volume.
func TrashItem(trashRequest TrashRequest) {
reqMtime := time.Unix(trashRequest.BlockMtime, 0)
- if time.Since(reqMtime) < blob_signature_ttl {
- log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blob_signature_ttl is %v! Skipping.",
+ if time.Since(reqMtime) < blobSignatureTTL {
+ log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blobSignatureTTL is %v! Skipping.",
time.Since(reqMtime),
trashRequest.Locator,
trashRequest.BlockMtime,
reqMtime,
- blob_signature_ttl)
+ blobSignatureTTL)
return
}
continue
}
- if never_delete {
- err = errors.New("did not delete block because never_delete is true")
+ if neverDelete {
+ err = errors.New("did not delete block because neverDelete is true")
} else {
err = volume.Delete(trashRequest.Locator)
}
Expect no errors.
*/
func TestTrashWorkerIntegration_GetNonExistingLocator(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
Locator1: "5d41402abc4b2a76b9719d911017c592",
Block1: []byte("hello"),
Expect the second locator in volume 2 to be unaffected.
*/
func TestTrashWorkerIntegration_LocatorInVolume1(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH_2,
- Block2: TEST_BLOCK_2,
+ Locator2: TestHash2,
+ Block2: TestBlock2,
CreateData: true,
- DeleteLocator: TEST_HASH, // first locator
+ DeleteLocator: TestHash, // first locator
ExpectLocator1: false,
ExpectLocator2: true,
Expect the first locator in volume 1 to be unaffected.
*/
func TestTrashWorkerIntegration_LocatorInVolume2(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH_2,
- Block2: TEST_BLOCK_2,
+ Locator2: TestHash2,
+ Block2: TestBlock2,
CreateData: true,
- DeleteLocator: TEST_HASH_2, // locator 2
+ DeleteLocator: TestHash2, // locator 2
ExpectLocator1: true,
ExpectLocator2: false,
Expect locator to be deleted from both volumes.
*/
func TestTrashWorkerIntegration_LocatorInBothVolumes(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH,
- Block2: TEST_BLOCK,
+ Locator2: TestHash,
+ Block2: TestBlock,
CreateData: true,
- DeleteLocator: TEST_HASH,
+ DeleteLocator: TestHash,
ExpectLocator1: false,
ExpectLocator2: false,
Delete the second and expect the first to be still around.
*/
func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH,
- Block2: TEST_BLOCK,
+ Locator2: TestHash,
+ Block2: TestBlock,
CreateData: true,
DifferentMtimes: true,
- DeleteLocator: TEST_HASH,
+ DeleteLocator: TestHash,
ExpectLocator1: true,
ExpectLocator2: false,
Expect the other unaffected.
*/
func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH_2,
- Block2: TEST_BLOCK_2,
+ Locator2: TestHash2,
+ Block2: TestBlock2,
CreateData: true,
CreateInVolume1: true,
- DeleteLocator: TEST_HASH, // locator 1
+ DeleteLocator: TestHash, // locator 1
ExpectLocator1: false,
ExpectLocator2: true,
will not be deleted becuase its Mtime is within the trash life time.
*/
func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(t *testing.T) {
- never_delete = false
+ neverDelete = false
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH_2,
- Block2: TEST_BLOCK_2,
+ Locator2: TestHash2,
+ Block2: TestBlock2,
CreateData: true,
CreateInVolume1: true,
UseTrashLifeTime: true,
- DeleteLocator: TEST_HASH, // locator 1
+ DeleteLocator: TestHash, // locator 1
// Since trash life time is in effect, block won't be deleted.
ExpectLocator1: true,
performTrashWorkerTest(testData, t)
}
-/* Delete a block with matching mtime for locator in both volumes, but never_delete is true,
+/* Delete a block with matching mtime for locator in both volumes, but neverDelete is true,
so block won't be deleted.
*/
func TestTrashWorkerIntegration_NeverDelete(t *testing.T) {
- never_delete = true
+ neverDelete = true
testData := TrashWorkerTestData{
- Locator1: TEST_HASH,
- Block1: TEST_BLOCK,
+ Locator1: TestHash,
+ Block1: TestBlock,
- Locator2: TEST_HASH,
- Block2: TEST_BLOCK,
+ Locator2: TestHash,
+ Block2: TestBlock,
CreateData: true,
- DeleteLocator: TEST_HASH,
+ DeleteLocator: TestHash,
ExpectLocator1: true,
ExpectLocator2: true,
}
}
- oldBlockTime := time.Now().Add(-blob_signature_ttl - time.Minute)
+ oldBlockTime := time.Now().Add(-blobSignatureTTL - time.Minute)
// Create TrashRequest for the test
trashRequest := TrashRequest{
-// A Volume is an interface representing a Keep back-end storage unit:
-// for example, a single mounted disk, a RAID array, an Amazon S3 volume,
-// etc.
-
package main
import (
"time"
)
+// A Volume is an interface representing a Keep back-end storage unit:
+// for example, a single mounted disk, a RAID array, an Amazon S3 volume,
+// etc.
type Volume interface {
// Get a block. IFF the returned error is nil, the caller must
// put the returned slice back into the buffer pool when it's
// access log if the block is not found on any other volumes
// either).
//
- // If the data in the backing store is bigger than BLOCKSIZE,
+ // If the data in the backing store is bigger than BlockSize,
// Get is permitted to return an error without reading any of
// the data.
Get(loc string) ([]byte, error)
//
// loc is as described in Get.
//
- // len(block) is guaranteed to be between 0 and BLOCKSIZE.
+ // len(block) is guaranteed to be between 0 and BlockSize.
//
// If a block is already stored under the same name (loc) with
// different content, Put must either overwrite the existing
- // data with the new data or return a non-nil error.
+ // data with the new data or return a non-nil error. When
+ // overwriting existing data, it must never leave the storage
+ // device in an inconsistent state: a subsequent call to Get
+ // must return either the entire old block, the entire new
+ // block, or an error. (An implementation that cannot peform
+ // atomic updates must leave the old data alone and return an
+ // error.)
//
// Put also sets the timestamp for the given locator to the
// current time.
//
// - size is the number of bytes of content, given as a
// decimal number with one or more digits
- //
+ //
// - timestamp is the timestamp stored for the locator,
// given as a decimal number of seconds after January 1,
// 1970 UTC.
// loc is as described in Get.
//
// If the timestamp for the given locator is newer than
- // blob_signature_ttl, Delete must not delete the data.
+ // blobSignatureTTL, Delete must not delete the data.
//
// If a Delete operation overlaps with any Touch or Put
// operations on the same locator, the implementation must
// reliably or fail outright.
//
// Corollary: A successful Touch or Put guarantees a block
- // will not be deleted for at least blob_signature_ttl
+ // will not be deleted for at least blobSignatureTTL
// seconds.
Delete(loc string) error
// will fail because it is full, but Mtime or Delete can
// succeed -- then Writable should return false.
Writable() bool
+
+ // Replication returns the storage redundancy of the
+ // underlying device. It will be passed on to clients in
+ // responses to PUT requests.
+ Replication() int
}
// A VolumeManager tells callers which volumes can read, which volumes
counter uint32
}
+// MakeRRVolumeManager initializes RRVolumeManager
func MakeRRVolumeManager(volumes []Volume) *RRVolumeManager {
vm := &RRVolumeManager{}
for _, v := range volumes {
return vm
}
+// AllReadable returns an array of all readable volumes
func (vm *RRVolumeManager) AllReadable() []Volume {
return vm.readables
}
+// AllWritable returns an array of all writable volumes
func (vm *RRVolumeManager) AllWritable() []Volume {
return vm.writables
}
+// NextWritable returns the next writable
func (vm *RRVolumeManager) NextWritable() Volume {
if len(vm.writables) == 0 {
return nil
return vm.writables[i%uint32(len(vm.writables))]
}
+// Close the RRVolumeManager
func (vm *RRVolumeManager) Close() {
}
+
+// VolumeStatus provides status information of the volume consisting of:
+// * mount_point
+// * device_num (an integer identifying the underlying storage system)
+// * bytes_free
+// * bytes_used
+type VolumeStatus struct {
+ MountPoint string `json:"mount_point"`
+ DeviceNum uint64 `json:"device_num"`
+ BytesFree uint64 `json:"bytes_free"`
+ BytesUsed uint64 `json:"bytes_used"`
+}
import (
"bytes"
+ "crypto/md5"
+ "fmt"
"os"
+ "regexp"
+ "sort"
+ "strings"
"testing"
"time"
)
// A TestableVolumeFactory returns a new TestableVolume. The factory
-// function, and the TestableVolume it returns, can use t to write
+// function, and the TestableVolume it returns, can use "t" to write
// logs, fail the current test, etc.
type TestableVolumeFactory func(t *testing.T) TestableVolume
// DoGenericVolumeTests runs a set of tests that every TestableVolume
-// is expected to pass. It calls factory to create a new
-// TestableVolume for each test case, to avoid leaking state between
-// tests.
+// is expected to pass. It calls factory to create a new TestableVolume
+// for each test case, to avoid leaking state between tests.
func DoGenericVolumeTests(t *testing.T, factory TestableVolumeFactory) {
+ testGet(t, factory)
+ testGetNoSuchBlock(t, factory)
+
+ testCompareNonexistent(t, factory)
+ testCompareSameContent(t, factory, TestHash, TestBlock)
+ testCompareSameContent(t, factory, EmptyHash, EmptyBlock)
+ testCompareWithCollision(t, factory, TestHash, TestBlock, []byte("baddata"))
+ testCompareWithCollision(t, factory, TestHash, TestBlock, EmptyBlock)
+ testCompareWithCollision(t, factory, EmptyHash, EmptyBlock, TestBlock)
+ testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, []byte("baddata"))
+ testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, EmptyBlock)
+ testCompareWithCorruptStoredData(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+
+ testPutBlockWithSameContent(t, factory, TestHash, TestBlock)
+ testPutBlockWithSameContent(t, factory, EmptyHash, EmptyBlock)
+ testPutBlockWithDifferentContent(t, factory, TestHash, TestBlock, TestBlock2)
+ testPutBlockWithDifferentContent(t, factory, TestHash, EmptyBlock, TestBlock)
+ testPutBlockWithDifferentContent(t, factory, TestHash, TestBlock, EmptyBlock)
+ testPutBlockWithDifferentContent(t, factory, EmptyHash, EmptyBlock, TestBlock)
+ testPutMultipleBlocks(t, factory)
+
+ testPutAndTouch(t, factory)
+ testTouchNoSuchBlock(t, factory)
+
+ testMtimeNoSuchBlock(t, factory)
+
+ testIndexTo(t, factory)
+
testDeleteNewBlock(t, factory)
testDeleteOldBlock(t, factory)
+ testDeleteNoSuchBlock(t, factory)
+
+ testStatus(t, factory)
+
+ testString(t, factory)
+
+ testUpdateReadOnly(t, factory)
+
+ testGetConcurrent(t, factory)
+ testPutConcurrent(t, factory)
+
+ testPutFullBlock(t, factory)
+}
+
+// Put a test block, get it and verify content
+// Test should pass for both writable and read-only volumes
+func testGet(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ v.PutRaw(TestHash, TestBlock)
+
+ buf, err := v.Get(TestHash)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ bufs.Put(buf)
+
+ if bytes.Compare(buf, TestBlock) != 0 {
+ t.Errorf("expected %s, got %s", string(TestBlock), string(buf))
+ }
+}
+
+// Invoke get on a block that does not exist in volume; should result in error
+// Test should pass for both writable and read-only volumes
+func testGetNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if _, err := v.Get(TestHash2); err == nil {
+ t.Errorf("Expected error while getting non-existing block %v", TestHash2)
+ }
+}
+
+// Compare() should return os.ErrNotExist if the block does not exist.
+// Otherwise, writing new data causes CompareAndTouch() to generate
+// error logs even though everything is working fine.
+func testCompareNonexistent(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ err := v.Compare(TestHash, TestBlock)
+ if err != os.ErrNotExist {
+ t.Errorf("Got err %T %q, expected os.ErrNotExist", err, err)
+ }
+}
+
+// Put a test block and compare the locator with same content
+// Test should pass for both writable and read-only volumes
+func testCompareSameContent(t *testing.T, factory TestableVolumeFactory, testHash string, testData []byte) {
+ v := factory(t)
+ defer v.Teardown()
+
+ v.PutRaw(testHash, testData)
+
+ // Compare the block locator with same content
+ err := v.Compare(testHash, testData)
+ if err != nil {
+ t.Errorf("Got err %q, expected nil", err)
+ }
+}
+
+// Test behavior of Compare() when stored data matches expected
+// checksum but differs from new data we need to store. Requires
+// testHash = md5(testDataA).
+//
+// Test should pass for both writable and read-only volumes
+func testCompareWithCollision(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+ v := factory(t)
+ defer v.Teardown()
+
+ v.PutRaw(testHash, testDataA)
+
+ // Compare the block locator with different content; collision
+ err := v.Compare(TestHash, testDataB)
+ if err == nil {
+ t.Errorf("Got err nil, expected error due to collision")
+ }
+}
+
+// Test behavior of Compare() when stored data has become
+// corrupted. Requires testHash = md5(testDataA) != md5(testDataB).
+//
+// Test should pass for both writable and read-only volumes
+func testCompareWithCorruptStoredData(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+ v := factory(t)
+ defer v.Teardown()
+
+ v.PutRaw(TestHash, testDataB)
+
+ err := v.Compare(testHash, testDataA)
+ if err == nil || err == CollisionError {
+ t.Errorf("Got err %+v, expected non-collision error", err)
+ }
+}
+
+// Put a block and put again with same content
+// Test is intended for only writable volumes
+func testPutBlockWithSameContent(t *testing.T, factory TestableVolumeFactory, testHash string, testData []byte) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if v.Writable() == false {
+ return
+ }
+
+ err := v.Put(testHash, testData)
+ if err != nil {
+ t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
+ }
+
+ err = v.Put(testHash, testData)
+ if err != nil {
+ t.Errorf("Got err putting block second time %q: %q, expected nil", TestBlock, err)
+ }
+}
+
+// Put a block and put again with different content
+// Test is intended for only writable volumes
+func testPutBlockWithDifferentContent(t *testing.T, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if v.Writable() == false {
+ return
+ }
+
+ err := v.Put(testHash, testDataA)
+ if err != nil {
+ t.Errorf("Got err putting block %q: %q, expected nil", testDataA, err)
+ }
+
+ putErr := v.Put(testHash, testDataB)
+ buf, getErr := v.Get(testHash)
+ if putErr == nil {
+ // Put must not return a nil error unless it has
+ // overwritten the existing data.
+ if bytes.Compare(buf, testDataB) != 0 {
+ t.Errorf("Put succeeded but Get returned %+q, expected %+q", buf, testDataB)
+ }
+ } else {
+ // It is permissible for Put to fail, but it must
+ // leave us with either the original data, the new
+ // data, or nothing at all.
+ if getErr == nil && bytes.Compare(buf, testDataA) != 0 && bytes.Compare(buf, testDataB) != 0 {
+ t.Errorf("Put failed but Get returned %+q, which is neither %+q nor %+q", buf, testDataA, testDataB)
+ }
+ }
+ if getErr == nil {
+ bufs.Put(buf)
+ }
+}
+
+// Put and get multiple blocks
+// Test is intended for only writable volumes
+func testPutMultipleBlocks(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if v.Writable() == false {
+ return
+ }
+
+ err := v.Put(TestHash, TestBlock)
+ if err != nil {
+ t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
+ }
+
+ err = v.Put(TestHash2, TestBlock2)
+ if err != nil {
+ t.Errorf("Got err putting block %q: %q, expected nil", TestBlock2, err)
+ }
+
+ err = v.Put(TestHash3, TestBlock3)
+ if err != nil {
+ t.Errorf("Got err putting block %q: %q, expected nil", TestBlock3, err)
+ }
+
+ data, err := v.Get(TestHash)
+ if err != nil {
+ t.Error(err)
+ } else {
+ if bytes.Compare(data, TestBlock) != 0 {
+ t.Errorf("Block present, but got %+q, expected %+q", data, TestBlock)
+ }
+ bufs.Put(data)
+ }
+
+ data, err = v.Get(TestHash2)
+ if err != nil {
+ t.Error(err)
+ } else {
+ if bytes.Compare(data, TestBlock2) != 0 {
+ t.Errorf("Block present, but got %+q, expected %+q", data, TestBlock2)
+ }
+ bufs.Put(data)
+ }
+
+ data, err = v.Get(TestHash3)
+ if err != nil {
+ t.Error(err)
+ } else {
+ if bytes.Compare(data, TestBlock3) != 0 {
+ t.Errorf("Block present, but to %+q, expected %+q", data, TestBlock3)
+ }
+ bufs.Put(data)
+ }
+}
+
+// testPutAndTouch
+// Test that when applying PUT to a block that already exists,
+// the block's modification time is updated.
+// Test is intended for only writable volumes
+func testPutAndTouch(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if v.Writable() == false {
+ return
+ }
+
+ if err := v.Put(TestHash, TestBlock); err != nil {
+ t.Error(err)
+ }
+
+ // We'll verify { t0 < threshold < t1 }, where t0 is the
+ // existing block's timestamp on disk before Put() and t1 is
+ // its timestamp after Put().
+ threshold := time.Now().Add(-time.Second)
+
+ // Set the stored block's mtime far enough in the past that we
+ // can see the difference between "timestamp didn't change"
+ // and "timestamp granularity is too low".
+ v.TouchWithDate(TestHash, time.Now().Add(-20*time.Second))
+
+ // Make sure v.Mtime() agrees the above Utime really worked.
+ if t0, err := v.Mtime(TestHash); err != nil || t0.IsZero() || !t0.Before(threshold) {
+ t.Errorf("Setting mtime failed: %v, %v", t0, err)
+ }
+
+ // Write the same block again.
+ if err := v.Put(TestHash, TestBlock); err != nil {
+ t.Error(err)
+ }
+
+ // Verify threshold < t1
+ if t1, err := v.Mtime(TestHash); err != nil {
+ t.Error(err)
+ } else if t1.Before(threshold) {
+ t.Errorf("t1 %v should be >= threshold %v after v.Put ", t1, threshold)
+ }
+}
+
+// Touching a non-existing block should result in error.
+// Test should pass for both writable and read-only volumes
+func testTouchNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if err := v.Touch(TestHash); err == nil {
+ t.Error("Expected error when attempted to touch a non-existing block")
+ }
}
-// Calling Delete() for a block immediately after writing it should
-// neither delete the data nor return an error.
+// Invoking Mtime on a non-existing block should result in error.
+// Test should pass for both writable and read-only volumes
+func testMtimeNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if _, err := v.Mtime("12345678901234567890123456789012"); err == nil {
+ t.Error("Expected error when updating Mtime on a non-existing block")
+ }
+}
+
+// Put a few blocks and invoke IndexTo with:
+// * no prefix
+// * with a prefix
+// * with no such prefix
+// Test should pass for both writable and read-only volumes
+func testIndexTo(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ v.PutRaw(TestHash, TestBlock)
+ v.PutRaw(TestHash2, TestBlock2)
+ v.PutRaw(TestHash3, TestBlock3)
+
+ // Blocks whose names aren't Keep hashes should be omitted from
+ // index
+ v.PutRaw("fffffffffnotreallyahashfffffffff", nil)
+ v.PutRaw("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", nil)
+ v.PutRaw("f0000000000000000000000000000000f", nil)
+ v.PutRaw("f00", nil)
+
+ buf := new(bytes.Buffer)
+ v.IndexTo("", buf)
+ indexRows := strings.Split(string(buf.Bytes()), "\n")
+ sort.Strings(indexRows)
+ sortedIndex := strings.Join(indexRows, "\n")
+ m, err := regexp.MatchString(
+ `^\n`+TestHash+`\+\d+ \d+\n`+
+ TestHash3+`\+\d+ \d+\n`+
+ TestHash2+`\+\d+ \d+$`,
+ sortedIndex)
+ if err != nil {
+ t.Error(err)
+ } else if !m {
+ t.Errorf("Got index %q for empty prefix", sortedIndex)
+ }
+
+ for _, prefix := range []string{"f", "f15", "f15ac"} {
+ buf = new(bytes.Buffer)
+ v.IndexTo(prefix, buf)
+
+ m, err := regexp.MatchString(`^`+TestHash2+`\+\d+ \d+\n$`, string(buf.Bytes()))
+ if err != nil {
+ t.Error(err)
+ } else if !m {
+ t.Errorf("Got index %q for prefix %s", string(buf.Bytes()), prefix)
+ }
+ }
+
+ for _, prefix := range []string{"zero", "zip", "zilch"} {
+ buf = new(bytes.Buffer)
+ v.IndexTo(prefix, buf)
+ if err != nil {
+ t.Errorf("Got error on IndexTo with no such prefix %v", err.Error())
+ } else if buf.Len() != 0 {
+ t.Errorf("Expected empty list for IndexTo with no such prefix %s", prefix)
+ }
+ }
+}
+
+// Calling Delete() for a block immediately after writing it (not old enough)
+// should neither delete the data nor return an error.
+// Test is intended for only writable volumes
func testDeleteNewBlock(t *testing.T, factory TestableVolumeFactory) {
v := factory(t)
defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
+ blobSignatureTTL = 300 * time.Second
+
+ if v.Writable() == false {
+ return
+ }
- if err := v.Delete(TEST_HASH); err != nil {
+ v.Put(TestHash, TestBlock)
+
+ if err := v.Delete(TestHash); err != nil {
t.Error(err)
}
- if data, err := v.Get(TEST_HASH); err != nil {
+ data, err := v.Get(TestHash)
+ if err != nil {
t.Error(err)
- } else if bytes.Compare(data, TEST_BLOCK) != 0 {
- t.Error("Block still present, but content is incorrect: %+v != %+v", data, TEST_BLOCK)
+ } else {
+ if bytes.Compare(data, TestBlock) != 0 {
+ t.Errorf("Got data %+q, expected %+q", data, TestBlock)
+ }
+ bufs.Put(data)
}
}
// Calling Delete() for a block with a timestamp older than
-// blob_signature_ttl seconds in the past should delete the data.
+// blobSignatureTTL seconds in the past should delete the data.
+// Test is intended for only writable volumes
func testDeleteOldBlock(t *testing.T, factory TestableVolumeFactory) {
v := factory(t)
defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
- v.TouchWithDate(TEST_HASH, time.Now().Add(-2*blob_signature_ttl*time.Second))
+ blobSignatureTTL = 300 * time.Second
+
+ if v.Writable() == false {
+ return
+ }
+
+ v.Put(TestHash, TestBlock)
+ v.TouchWithDate(TestHash, time.Now().Add(-2*blobSignatureTTL))
+
+ if err := v.Delete(TestHash); err != nil {
+ t.Error(err)
+ }
+ if _, err := v.Get(TestHash); err == nil || !os.IsNotExist(err) {
+ t.Errorf("os.IsNotExist(%v) should have been true", err)
+ }
+}
+
+// Calling Delete() for a block that does not exist should result in error.
+// Test should pass for both writable and read-only volumes
+func testDeleteNoSuchBlock(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if err := v.Delete(TestHash2); err == nil {
+ t.Errorf("Expected error when attempting to delete a non-existing block")
+ }
+}
+
+// Invoke Status and verify that VolumeStatus is returned
+// Test should pass for both writable and read-only volumes
+func testStatus(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ // Get node status and make a basic sanity check.
+ status := v.Status()
+ if status.DeviceNum == 0 {
+ t.Errorf("uninitialized device_num in %v", status)
+ }
+
+ if status.BytesFree == 0 {
+ t.Errorf("uninitialized bytes_free in %v", status)
+ }
+
+ if status.BytesUsed == 0 {
+ t.Errorf("uninitialized bytes_used in %v", status)
+ }
+}
+
+// Invoke String for the volume; expect non-empty result
+// Test should pass for both writable and read-only volumes
+func testString(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
- if err := v.Delete(TEST_HASH); err != nil {
+ if id := v.String(); len(id) == 0 {
+ t.Error("Got empty string for v.String()")
+ }
+}
+
+// Putting, updating, touching, and deleting blocks from a read-only volume result in error.
+// Test is intended for only read-only volumes
+func testUpdateReadOnly(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if v.Writable() == true {
+ return
+ }
+
+ v.PutRaw(TestHash, TestBlock)
+
+ // Get from read-only volume should succeed
+ _, err := v.Get(TestHash)
+ if err != nil {
+ t.Errorf("got err %v, expected nil", err)
+ }
+
+ // Put a new block to read-only volume should result in error
+ err = v.Put(TestHash2, TestBlock2)
+ if err == nil {
+ t.Errorf("Expected error when putting block in a read-only volume")
+ }
+ _, err = v.Get(TestHash2)
+ if err == nil {
+ t.Errorf("Expected error when getting block whose put in read-only volume failed")
+ }
+
+ // Touch a block in read-only volume should result in error
+ err = v.Touch(TestHash)
+ if err == nil {
+ t.Errorf("Expected error when touching block in a read-only volume")
+ }
+
+ // Delete a block from a read-only volume should result in error
+ err = v.Delete(TestHash)
+ if err == nil {
+ t.Errorf("Expected error when deleting block from a read-only volume")
+ }
+
+ // Overwriting an existing block in read-only volume should result in error
+ err = v.Put(TestHash, TestBlock)
+ if err == nil {
+ t.Errorf("Expected error when putting block in a read-only volume")
+ }
+}
+
+// Launch concurrent Gets
+// Test should pass for both writable and read-only volumes
+func testGetConcurrent(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ v.PutRaw(TestHash, TestBlock)
+ v.PutRaw(TestHash2, TestBlock2)
+ v.PutRaw(TestHash3, TestBlock3)
+
+ sem := make(chan int)
+ go func(sem chan int) {
+ buf, err := v.Get(TestHash)
+ if err != nil {
+ t.Errorf("err1: %v", err)
+ }
+ bufs.Put(buf)
+ if bytes.Compare(buf, TestBlock) != 0 {
+ t.Errorf("buf should be %s, is %s", string(TestBlock), string(buf))
+ }
+ sem <- 1
+ }(sem)
+
+ go func(sem chan int) {
+ buf, err := v.Get(TestHash2)
+ if err != nil {
+ t.Errorf("err2: %v", err)
+ }
+ bufs.Put(buf)
+ if bytes.Compare(buf, TestBlock2) != 0 {
+ t.Errorf("buf should be %s, is %s", string(TestBlock2), string(buf))
+ }
+ sem <- 1
+ }(sem)
+
+ go func(sem chan int) {
+ buf, err := v.Get(TestHash3)
+ if err != nil {
+ t.Errorf("err3: %v", err)
+ }
+ bufs.Put(buf)
+ if bytes.Compare(buf, TestBlock3) != 0 {
+ t.Errorf("buf should be %s, is %s", string(TestBlock3), string(buf))
+ }
+ sem <- 1
+ }(sem)
+
+ // Wait for all goroutines to finish
+ for done := 0; done < 3; {
+ done += <-sem
+ }
+}
+
+// Launch concurrent Puts
+// Test is intended for only writable volumes
+func testPutConcurrent(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if v.Writable() == false {
+ return
+ }
+
+ sem := make(chan int)
+ go func(sem chan int) {
+ err := v.Put(TestHash, TestBlock)
+ if err != nil {
+ t.Errorf("err1: %v", err)
+ }
+ sem <- 1
+ }(sem)
+
+ go func(sem chan int) {
+ err := v.Put(TestHash2, TestBlock2)
+ if err != nil {
+ t.Errorf("err2: %v", err)
+ }
+ sem <- 1
+ }(sem)
+
+ go func(sem chan int) {
+ err := v.Put(TestHash3, TestBlock3)
+ if err != nil {
+ t.Errorf("err3: %v", err)
+ }
+ sem <- 1
+ }(sem)
+
+ // Wait for all goroutines to finish
+ for done := 0; done < 3; {
+ done += <-sem
+ }
+
+ // Double check that we actually wrote the blocks we expected to write.
+ buf, err := v.Get(TestHash)
+ if err != nil {
+ t.Errorf("Get #1: %v", err)
+ }
+ bufs.Put(buf)
+ if bytes.Compare(buf, TestBlock) != 0 {
+ t.Errorf("Get #1: expected %s, got %s", string(TestBlock), string(buf))
+ }
+
+ buf, err = v.Get(TestHash2)
+ if err != nil {
+ t.Errorf("Get #2: %v", err)
+ }
+ bufs.Put(buf)
+ if bytes.Compare(buf, TestBlock2) != 0 {
+ t.Errorf("Get #2: expected %s, got %s", string(TestBlock2), string(buf))
+ }
+
+ buf, err = v.Get(TestHash3)
+ if err != nil {
+ t.Errorf("Get #3: %v", err)
+ }
+ bufs.Put(buf)
+ if bytes.Compare(buf, TestBlock3) != 0 {
+ t.Errorf("Get #3: expected %s, got %s", string(TestBlock3), string(buf))
+ }
+}
+
+// Write and read back a full size block
+func testPutFullBlock(t *testing.T, factory TestableVolumeFactory) {
+ v := factory(t)
+ defer v.Teardown()
+
+ if !v.Writable() {
+ return
+ }
+
+ wdata := make([]byte, BlockSize)
+ wdata[0] = 'a'
+ wdata[BlockSize-1] = 'z'
+ hash := fmt.Sprintf("%x", md5.Sum(wdata))
+ err := v.Put(hash, wdata)
+ if err != nil {
+ t.Fatal(err)
+ }
+ rdata, err := v.Get(hash)
+ if err != nil {
t.Error(err)
+ } else {
+ defer bufs.Put(rdata)
}
- if _, err := v.Get(TEST_HASH); err == nil || !os.IsNotExist(err) {
- t.Errorf("os.IsNotExist(%v) should have been true", err.Error())
+ if bytes.Compare(rdata, wdata) != 0 {
+ t.Error("rdata != wdata")
}
}
// channel unblocks all operations. By default, Gate is a
// closed channel, so all operations proceed without
// blocking. See trash_worker_test.go for an example.
- Gate chan struct{}
+ Gate chan struct{}
called map[string]int
mutex sync.Mutex
func (v *MockVolume) CallCount(method string) int {
v.mutex.Lock()
defer v.mutex.Unlock()
- if c, ok := v.called[method]; !ok {
+ c, ok := v.called[method]
+ if !ok {
return 0
- } else {
- return c
}
+ return c
}
func (v *MockVolume) gotCall(method string) {
return MethodDisabledError
}
if _, ok := v.Store[loc]; ok {
- if time.Since(v.Timestamps[loc]) < blob_signature_ttl {
+ if time.Since(v.Timestamps[loc]) < blobSignatureTTL {
return nil
}
delete(v.Store, loc)
func (v *MockVolume) Writable() bool {
return !v.Readonly
}
+
+func (v *MockVolume) Replication() int {
+ return 1
+}
package main
import (
- "bytes"
+ "bufio"
+ "errors"
+ "flag"
"fmt"
"io"
"io/ioutil"
"time"
)
+type unixVolumeAdder struct {
+ *volumeSet
+}
+
+func (vs *unixVolumeAdder) Set(value string) error {
+ if dirs := strings.Split(value, ","); len(dirs) > 1 {
+ log.Print("DEPRECATED: using comma-separated volume list.")
+ for _, dir := range dirs {
+ if err := vs.Set(dir); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+ if len(value) == 0 || value[0] != '/' {
+ return errors.New("Invalid volume: must begin with '/'.")
+ }
+ if _, err := os.Stat(value); err != nil {
+ return err
+ }
+ var locker sync.Locker
+ if flagSerializeIO {
+ locker = &sync.Mutex{}
+ }
+ *vs.volumeSet = append(*vs.volumeSet, &UnixVolume{
+ root: value,
+ locker: locker,
+ readonly: flagReadonly,
+ })
+ return nil
+}
+
+func init() {
+ flag.Var(
+ &unixVolumeAdder{&volumes},
+ "volumes",
+ "Deprecated synonym for -volume.")
+ flag.Var(
+ &unixVolumeAdder{&volumes},
+ "volume",
+ "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
+}
+
+// Discover adds a UnixVolume for every directory named "keep" that is
+// located at the top level of a device- or tmpfs-backed mount point
+// other than "/". It returns the number of volumes added.
+func (vs *unixVolumeAdder) Discover() int {
+ added := 0
+ f, err := os.Open(ProcMounts)
+ if err != nil {
+ log.Fatalf("opening %s: %s", ProcMounts, err)
+ }
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ args := strings.Fields(scanner.Text())
+ if err := scanner.Err(); err != nil {
+ log.Fatalf("reading %s: %s", ProcMounts, err)
+ }
+ dev, mount := args[0], args[1]
+ if mount == "/" {
+ continue
+ }
+ if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
+ continue
+ }
+ keepdir := mount + "/keep"
+ if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
+ continue
+ }
+ // Set the -readonly flag (but only for this volume)
+ // if the filesystem is mounted readonly.
+ flagReadonlyWas := flagReadonly
+ for _, fsopt := range strings.Split(args[3], ",") {
+ if fsopt == "ro" {
+ flagReadonly = true
+ break
+ }
+ if fsopt == "rw" {
+ break
+ }
+ }
+ if err := vs.Set(keepdir); err != nil {
+ log.Printf("adding %q: %s", keepdir, err)
+ } else {
+ added++
+ }
+ flagReadonly = flagReadonlyWas
+ }
+ return added
+}
+
// A UnixVolume stores and retrieves blocks in a local directory.
type UnixVolume struct {
// path to the volume's root directory
readonly bool
}
+// Touch sets the timestamp for the given locator to the current time
func (v *UnixVolume) Touch(loc string) error {
if v.readonly {
return MethodDisabledError
return syscall.Utime(p, &utime)
}
+// Mtime returns the stored timestamp for the given locator.
func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
p := v.blockPath(loc)
- if fi, err := os.Stat(p); err != nil {
+ fi, err := os.Stat(p)
+ if err != nil {
return time.Time{}, err
- } else {
- return fi.ModTime(), nil
}
+ return fi.ModTime(), nil
}
// Lock the locker (if one is in use), open the file for reading, and
if err == nil {
if stat.Size() < 0 {
err = os.ErrInvalid
- } else if stat.Size() > BLOCKSIZE {
+ } else if stat.Size() > BlockSize {
err = TooLongError
}
}
path := v.blockPath(loc)
stat, err := v.stat(path)
if err != nil {
- return nil, err
+ return nil, v.translateError(err)
}
buf := bufs.Get(int(stat.Size()))
err = v.getFunc(path, func(rdr io.Reader) error {
// bytes.Compare(), but uses less memory.
func (v *UnixVolume) Compare(loc string, expect []byte) error {
path := v.blockPath(loc)
- stat, err := v.stat(path)
- if err != nil {
- return err
- }
- bufLen := 1 << 20
- if int64(bufLen) > stat.Size() {
- bufLen = int(stat.Size())
+ if _, err := v.stat(path); err != nil {
+ return v.translateError(err)
}
- cmp := expect
- buf := make([]byte, bufLen)
return v.getFunc(path, func(rdr io.Reader) error {
- // Loop invariants: all data read so far matched what
- // we expected, and the first N bytes of cmp are
- // expected to equal the next N bytes read from
- // reader.
- for {
- n, err := rdr.Read(buf)
- if n > len(cmp) || bytes.Compare(cmp[:n], buf[:n]) != 0 {
- return collisionOrCorrupt(loc[:32], expect[:len(expect)-len(cmp)], buf[:n], rdr)
- }
- cmp = cmp[n:]
- if err == io.EOF {
- if len(cmp) != 0 {
- return collisionOrCorrupt(loc[:32], expect[:len(expect)-len(cmp)], nil, nil)
- }
- return nil
- } else if err != nil {
- return err
- }
- }
+ return compareReaderWithBuf(rdr, expect, loc[:32])
})
}
}
var blockDirRe = regexp.MustCompile(`^[0-9a-f]+$`)
+var blockFileRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
// IndexTo writes (to the given Writer) a list of blocks found on this
// volume which begin with the specified prefix. If the prefix is an
if !strings.HasPrefix(name, prefix) {
continue
}
+ if !blockFileRe.MatchString(name) {
+ continue
+ }
_, err = fmt.Fprint(w,
name,
"+", fileInfo[0].Size(),
}
}
+// Delete deletes the block data from the unix storage
func (v *UnixVolume) Delete(loc string) error {
// Touch() must be called before calling Write() on a block. Touch()
// also uses lockfile(). This avoids a race condition between Write()
}
defer unlockfile(f)
- // If the block has been PUT in the last blob_signature_ttl
+ // If the block has been PUT in the last blobSignatureTTL
// seconds, return success without removing the block. This
// protects data from garbage collection until it is no longer
// possible for clients to retrieve the unreferenced blocks
if fi, err := os.Stat(p); err != nil {
return err
} else {
- if time.Since(fi.ModTime()) < blob_signature_ttl {
+ if time.Since(fi.ModTime()) < blobSignatureTTL {
return nil
}
}
}
// IsFull returns true if the free space on the volume is less than
-// MIN_FREE_KILOBYTES.
+// MinFreeKilobytes.
//
func (v *UnixVolume) IsFull() (isFull bool) {
fullSymlink := v.root + "/full"
}
if avail, err := v.FreeDiskSpace(); err == nil {
- isFull = avail < MIN_FREE_KILOBYTES
+ isFull = avail < MinFreeKilobytes
} else {
log.Printf("%s: FreeDiskSpace: %s\n", v, err)
isFull = false
return fmt.Sprintf("[UnixVolume %s]", v.root)
}
+// Writable returns false if all future Put, Mtime, and Delete calls are expected to fail.
func (v *UnixVolume) Writable() bool {
return !v.readonly
}
+func (v *UnixVolume) Replication() int {
+ return 1
+}
+
// lockfile and unlockfile use flock(2) to manage kernel file locks.
func lockfile(f *os.File) error {
return syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
func unlockfile(f *os.File) error {
return syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
}
+
+// Where appropriate, translate a more specific filesystem error to an
+// error recognized by handlers, like os.ErrNotExist.
+func (v *UnixVolume) translateError(err error) error {
+ switch err.(type) {
+ case *os.PathError:
+ // stat() returns a PathError if the parent directory
+ // (not just the file itself) is missing
+ return os.ErrNotExist
+ default:
+ return err
+ }
+}
"io"
"io/ioutil"
"os"
- "regexp"
- "sort"
"strings"
"sync"
"syscall"
}
}
+// serialize = false; readonly = false
func TestUnixVolumeWithGenericTests(t *testing.T) {
DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
return NewTestableUnixVolume(t, false, false)
})
}
-func TestGet(t *testing.T) {
- v := NewTestableUnixVolume(t, false, false)
- defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
+// serialize = false; readonly = true
+func TestUnixVolumeWithGenericTestsReadOnly(t *testing.T) {
+ DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+ return NewTestableUnixVolume(t, false, true)
+ })
+}
- buf, err := v.Get(TEST_HASH)
- if err != nil {
- t.Error(err)
- }
- if bytes.Compare(buf, TEST_BLOCK) != 0 {
- t.Errorf("expected %s, got %s", string(TEST_BLOCK), string(buf))
- }
+// serialize = true; readonly = false
+func TestUnixVolumeWithGenericTestsSerialized(t *testing.T) {
+ DoGenericVolumeTests(t, func(t *testing.T) TestableVolume {
+ return NewTestableUnixVolume(t, true, false)
+ })
+}
+
+// serialize = false; readonly = false
+func TestUnixVolumeHandlersWithGenericVolumeTests(t *testing.T) {
+ DoHandlersWithGenericVolumeTests(t, func(t *testing.T) (*RRVolumeManager, []TestableVolume) {
+ vols := make([]Volume, 2)
+ testableUnixVols := make([]TestableVolume, 2)
+
+ for i := range vols {
+ v := NewTestableUnixVolume(t, false, false)
+ vols[i] = v
+ testableUnixVols[i] = v
+ }
+
+ return MakeRRVolumeManager(vols), testableUnixVols
+ })
}
func TestGetNotFound(t *testing.T) {
v := NewTestableUnixVolume(t, false, false)
defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
+ v.Put(TestHash, TestBlock)
- buf, err := v.Get(TEST_HASH_2)
+ buf, err := v.Get(TestHash2)
switch {
case os.IsNotExist(err):
break
}
}
-func TestIndexTo(t *testing.T) {
- v := NewTestableUnixVolume(t, false, false)
- defer v.Teardown()
-
- v.Put(TEST_HASH, TEST_BLOCK)
- v.Put(TEST_HASH_2, TEST_BLOCK_2)
- v.Put(TEST_HASH_3, TEST_BLOCK_3)
-
- buf := new(bytes.Buffer)
- v.IndexTo("", buf)
- index_rows := strings.Split(string(buf.Bytes()), "\n")
- sort.Strings(index_rows)
- sorted_index := strings.Join(index_rows, "\n")
- m, err := regexp.MatchString(
- `^\n`+TEST_HASH+`\+\d+ \d+\n`+
- TEST_HASH_3+`\+\d+ \d+\n`+
- TEST_HASH_2+`\+\d+ \d+$`,
- sorted_index)
- if err != nil {
- t.Error(err)
- } else if !m {
- t.Errorf("Got index %q for empty prefix", sorted_index)
- }
-
- for _, prefix := range []string{"f", "f15", "f15ac"} {
- buf = new(bytes.Buffer)
- v.IndexTo(prefix, buf)
- m, err := regexp.MatchString(`^`+TEST_HASH_2+`\+\d+ \d+\n$`, string(buf.Bytes()))
- if err != nil {
- t.Error(err)
- } else if !m {
- t.Errorf("Got index %q for prefix %q", string(buf.Bytes()), prefix)
- }
- }
-}
-
func TestPut(t *testing.T) {
v := NewTestableUnixVolume(t, false, false)
defer v.Teardown()
- err := v.Put(TEST_HASH, TEST_BLOCK)
+ err := v.Put(TestHash, TestBlock)
if err != nil {
t.Error(err)
}
- p := fmt.Sprintf("%s/%s/%s", v.root, TEST_HASH[:3], TEST_HASH)
+ p := fmt.Sprintf("%s/%s/%s", v.root, TestHash[:3], TestHash)
if buf, err := ioutil.ReadFile(p); err != nil {
t.Error(err)
- } else if bytes.Compare(buf, TEST_BLOCK) != 0 {
+ } else if bytes.Compare(buf, TestBlock) != 0 {
t.Errorf("Write should have stored %s, did store %s",
- string(TEST_BLOCK), string(buf))
+ string(TestBlock), string(buf))
}
}
defer v.Teardown()
os.Chmod(v.root, 000)
- err := v.Put(TEST_HASH, TEST_BLOCK)
+ err := v.Put(TestHash, TestBlock)
if err == nil {
t.Error("Write should have failed")
}
v := NewTestableUnixVolume(t, false, true)
defer v.Teardown()
- v.PutRaw(TEST_HASH, TEST_BLOCK)
+ v.PutRaw(TestHash, TestBlock)
- _, err := v.Get(TEST_HASH)
+ _, err := v.Get(TestHash)
if err != nil {
t.Errorf("got err %v, expected nil", err)
}
- err = v.Put(TEST_HASH, TEST_BLOCK)
+ err = v.Put(TestHash, TestBlock)
if err != MethodDisabledError {
t.Errorf("got err %v, expected MethodDisabledError", err)
}
- err = v.Touch(TEST_HASH)
+ err = v.Touch(TestHash)
if err != MethodDisabledError {
t.Errorf("got err %v, expected MethodDisabledError", err)
}
- err = v.Delete(TEST_HASH)
+ err = v.Delete(TestHash)
if err != MethodDisabledError {
t.Errorf("got err %v, expected MethodDisabledError", err)
}
}
-// TestPutTouch
-// Test that when applying PUT to a block that already exists,
-// the block's modification time is updated.
-func TestPutTouch(t *testing.T) {
- v := NewTestableUnixVolume(t, false, false)
- defer v.Teardown()
-
- if err := v.Put(TEST_HASH, TEST_BLOCK); err != nil {
- t.Error(err)
- }
-
- // We'll verify { t0 < threshold < t1 }, where t0 is the
- // existing block's timestamp on disk before Put() and t1 is
- // its timestamp after Put().
- threshold := time.Now().Add(-time.Second)
-
- // Set the stored block's mtime far enough in the past that we
- // can see the difference between "timestamp didn't change"
- // and "timestamp granularity is too low".
- v.TouchWithDate(TEST_HASH, time.Now().Add(-20*time.Second))
-
- // Make sure v.Mtime() agrees the above Utime really worked.
- if t0, err := v.Mtime(TEST_HASH); err != nil || t0.IsZero() || !t0.Before(threshold) {
- t.Errorf("Setting mtime failed: %v, %v", t0, err)
- }
-
- // Write the same block again.
- if err := v.Put(TEST_HASH, TEST_BLOCK); err != nil {
- t.Error(err)
- }
-
- // Verify threshold < t1
- if t1, err := v.Mtime(TEST_HASH); err != nil {
- t.Error(err)
- } else if t1.Before(threshold) {
- t.Errorf("t1 %v should be >= threshold %v after v.Put ", t1, threshold)
- }
-}
-
-// Serialization tests: launch a bunch of concurrent
-//
-// TODO(twp): show that the underlying Read/Write operations executed
-// serially and not concurrently. The easiest way to do this is
-// probably to activate verbose or debug logging, capture log output
-// and examine it to confirm that Reads and Writes did not overlap.
-//
-// TODO(twp): a proper test of I/O serialization requires that a
-// second request start while the first one is still underway.
-// Guaranteeing that the test behaves this way requires some tricky
-// synchronization and mocking. For now we'll just launch a bunch of
-// requests simultaenously in goroutines and demonstrate that they
-// return accurate results.
-//
-func TestGetSerialized(t *testing.T) {
- // Create a volume with I/O serialization enabled.
- v := NewTestableUnixVolume(t, true, false)
- defer v.Teardown()
-
- v.Put(TEST_HASH, TEST_BLOCK)
- v.Put(TEST_HASH_2, TEST_BLOCK_2)
- v.Put(TEST_HASH_3, TEST_BLOCK_3)
-
- sem := make(chan int)
- go func(sem chan int) {
- buf, err := v.Get(TEST_HASH)
- if err != nil {
- t.Errorf("err1: %v", err)
- }
- if bytes.Compare(buf, TEST_BLOCK) != 0 {
- t.Errorf("buf should be %s, is %s", string(TEST_BLOCK), string(buf))
- }
- sem <- 1
- }(sem)
-
- go func(sem chan int) {
- buf, err := v.Get(TEST_HASH_2)
- if err != nil {
- t.Errorf("err2: %v", err)
- }
- if bytes.Compare(buf, TEST_BLOCK_2) != 0 {
- t.Errorf("buf should be %s, is %s", string(TEST_BLOCK_2), string(buf))
- }
- sem <- 1
- }(sem)
-
- go func(sem chan int) {
- buf, err := v.Get(TEST_HASH_3)
- if err != nil {
- t.Errorf("err3: %v", err)
- }
- if bytes.Compare(buf, TEST_BLOCK_3) != 0 {
- t.Errorf("buf should be %s, is %s", string(TEST_BLOCK_3), string(buf))
- }
- sem <- 1
- }(sem)
-
- // Wait for all goroutines to finish
- for done := 0; done < 3; {
- done += <-sem
- }
-}
-
-func TestPutSerialized(t *testing.T) {
- // Create a volume with I/O serialization enabled.
- v := NewTestableUnixVolume(t, true, false)
- defer v.Teardown()
-
- sem := make(chan int)
- go func(sem chan int) {
- err := v.Put(TEST_HASH, TEST_BLOCK)
- if err != nil {
- t.Errorf("err1: %v", err)
- }
- sem <- 1
- }(sem)
-
- go func(sem chan int) {
- err := v.Put(TEST_HASH_2, TEST_BLOCK_2)
- if err != nil {
- t.Errorf("err2: %v", err)
- }
- sem <- 1
- }(sem)
-
- go func(sem chan int) {
- err := v.Put(TEST_HASH_3, TEST_BLOCK_3)
- if err != nil {
- t.Errorf("err3: %v", err)
- }
- sem <- 1
- }(sem)
-
- // Wait for all goroutines to finish
- for done := 0; done < 3; {
- done += <-sem
- }
-
- // Double check that we actually wrote the blocks we expected to write.
- buf, err := v.Get(TEST_HASH)
- if err != nil {
- t.Errorf("Get #1: %v", err)
- }
- if bytes.Compare(buf, TEST_BLOCK) != 0 {
- t.Errorf("Get #1: expected %s, got %s", string(TEST_BLOCK), string(buf))
- }
-
- buf, err = v.Get(TEST_HASH_2)
- if err != nil {
- t.Errorf("Get #2: %v", err)
- }
- if bytes.Compare(buf, TEST_BLOCK_2) != 0 {
- t.Errorf("Get #2: expected %s, got %s", string(TEST_BLOCK_2), string(buf))
- }
-
- buf, err = v.Get(TEST_HASH_3)
- if err != nil {
- t.Errorf("Get #3: %v", err)
- }
- if bytes.Compare(buf, TEST_BLOCK_3) != 0 {
- t.Errorf("Get #3: expected %s, got %s", string(TEST_BLOCK_3), string(buf))
- }
-}
-
func TestIsFull(t *testing.T) {
v := NewTestableUnixVolume(t, false, false)
defer v.Teardown()
- full_path := v.root + "/full"
+ fullPath := v.root + "/full"
now := fmt.Sprintf("%d", time.Now().Unix())
- os.Symlink(now, full_path)
+ os.Symlink(now, fullPath)
if !v.IsFull() {
t.Errorf("%s: claims not to be full", v)
}
- os.Remove(full_path)
+ os.Remove(fullPath)
// Test with an expired /full link.
expired := fmt.Sprintf("%d", time.Now().Unix()-3605)
- os.Symlink(expired, full_path)
+ os.Symlink(expired, fullPath)
if v.IsFull() {
t.Errorf("%s: should no longer be full", v)
}
v := NewTestableUnixVolume(t, false, false)
defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
+ v.Put(TestHash, TestBlock)
mockErr := errors.New("Mock error")
- err := v.getFunc(v.blockPath(TEST_HASH), func(rdr io.Reader) error {
+ err := v.getFunc(v.blockPath(TestHash), func(rdr io.Reader) error {
return mockErr
})
if err != mockErr {
defer v.Teardown()
funcCalled := false
- err := v.getFunc(v.blockPath(TEST_HASH), func(rdr io.Reader) error {
+ err := v.getFunc(v.blockPath(TestHash), func(rdr io.Reader) error {
funcCalled = true
return nil
})
v := NewTestableUnixVolume(t, false, false)
defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
+ v.Put(TestHash, TestBlock)
mtx := NewMockMutex()
v.locker = mtx
funcCalled := make(chan struct{})
- go v.getFunc(v.blockPath(TEST_HASH), func(rdr io.Reader) error {
+ go v.getFunc(v.blockPath(TestHash), func(rdr io.Reader) error {
funcCalled <- struct{}{}
return nil
})
v := NewTestableUnixVolume(t, false, false)
defer v.Teardown()
- v.Put(TEST_HASH, TEST_BLOCK)
- err := v.Compare(TEST_HASH, TEST_BLOCK)
+ v.Put(TestHash, TestBlock)
+ err := v.Compare(TestHash, TestBlock)
if err != nil {
t.Errorf("Got err %q, expected nil", err)
}
- err = v.Compare(TEST_HASH, []byte("baddata"))
+ err = v.Compare(TestHash, []byte("baddata"))
if err != CollisionError {
t.Errorf("Got err %q, expected %q", err, CollisionError)
}
- v.Put(TEST_HASH, []byte("baddata"))
- err = v.Compare(TEST_HASH, TEST_BLOCK)
+ v.Put(TestHash, []byte("baddata"))
+ err = v.Compare(TestHash, TestBlock)
if err != DiskHashError {
t.Errorf("Got err %q, expected %q", err, DiskHashError)
}
- p := fmt.Sprintf("%s/%s/%s", v.root, TEST_HASH[:3], TEST_HASH)
+ p := fmt.Sprintf("%s/%s/%s", v.root, TestHash[:3], TestHash)
os.Chmod(p, 000)
- err = v.Compare(TEST_HASH, TEST_BLOCK)
+ err = v.Compare(TestHash, TestBlock)
if err == nil || strings.Index(err.Error(), "permission denied") < 0 {
t.Errorf("Got err %q, expected %q", err, "permission denied")
}
}
+
+// TODO(twp): show that the underlying Read/Write operations executed
+// serially and not concurrently. The easiest way to do this is
+// probably to activate verbose or debug logging, capture log output
+// and examine it to confirm that Reads and Writes did not overlap.
+//
+// TODO(twp): a proper test of I/O serialization requires that a
+// second request start while the first one is still underway.
+// Guaranteeing that the test behaves this way requires some tricky
+// synchronization and mocking. For now we'll just launch a bunch of
+// requests simultaenously in goroutines and demonstrate that they
+// return accurate results.
import "container/list"
+// WorkQueue definition
type WorkQueue struct {
getStatus chan WorkQueueStatus
newlist chan *list.List
DoneItem chan<- struct{}
}
+// WorkQueueStatus reflects the queue status.
type WorkQueueStatus struct {
InProgress int
Queued int
FileUtils.chown_R(l[:username], l[:username], userdotssh)
File.chmod(0700, userdotssh)
File.chmod(0750, @homedir)
+ File.chmod(0600, userauthkeys)
end
devnull.close
def timestamp_fresh(timestamp, fresh_time):
return (time.time() - timestamp) < fresh_time
+def arvados_node_missing(arvados_node, fresh_time):
+ """Indicate if cloud node corresponding to the arvados
+ node is "missing".
+
+ If True, this means the node has not pinged the API server within the timeout
+ period. If False, the ping is up to date. If the node has never pinged,
+ returns None.
+ """
+ if arvados_node["last_ping_at"] is None:
+ return None
+ else:
+ return not timestamp_fresh(arvados_timestamp(arvados_node["last_ping_at"]), fresh_time)
+
class ShutdownTimer(object):
"""Keep track of a cloud node's shutdown windows.
import pykka
from .. import \
- arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh
+ arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh, arvados_node_missing
from ...clientactor import _notify_subscribers
from ... import config
for shutdown.
"""
def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer,
- cloud_fqdn_func, timer_actor, update_actor, arvados_node=None,
- poll_stale_after=600, node_stale_after=3600):
+ cloud_fqdn_func, timer_actor, update_actor, cloud_client,
+ arvados_node=None, poll_stale_after=600, node_stale_after=3600,
+ boot_fail_after=1800
+ ):
super(ComputeNodeMonitorActor, self).__init__()
self._later = self.actor_ref.proxy()
self._logger = logging.getLogger('arvnodeman.computenode')
self._cloud_node_fqdn = cloud_fqdn_func
self._timer = timer_actor
self._update = update_actor
+ self._cloud = cloud_client
self.cloud_node = cloud_node
self.cloud_node_start_time = cloud_node_start_time
self.poll_stale_after = poll_stale_after
self.node_stale_after = node_stale_after
+ self.boot_fail_after = boot_fail_after
self.subscribers = set()
self.arvados_node = None
self._later.update_arvados_node(arvados_node)
def shutdown_eligible(self):
if not self._shutdowns.window_open():
return False
- elif self.arvados_node is None:
- # If this is a new, unpaired node, it's eligible for
- # shutdown--we figure there was an error during bootstrap.
- return timestamp_fresh(self.cloud_node_start_time,
- self.node_stale_after)
- else:
- return self.in_state('idle')
+ if self.arvados_node is None:
+ # Node is unpaired.
+ # If it hasn't pinged Arvados after boot_fail seconds, shut it down
+ return not timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after)
+ missing = arvados_node_missing(self.arvados_node, self.node_stale_after)
+ if missing and self._cloud.broken(self.cloud_node):
+ # Node is paired, but Arvados says it is missing and the cloud says the node
+ # is in an error state, so shut it down.
+ return True
+ if missing is None and self._cloud.broken(self.cloud_node):
+ self._logger.warning(
+ "cloud reports broken node, but paired node %s never pinged "
+ "(bug?) -- skipped check for node_stale_after",
+ self.arvados_node['uuid'])
+ return self.in_state('idle')
def consider_shutdown(self):
next_opening = self._shutdowns.next_opening()
from . import ComputeNodeShutdownActor as ShutdownActorBase
class ComputeNodeShutdownActor(ShutdownActorBase):
- SLURM_END_STATES = frozenset(['down\n', 'down*\n', 'drain\n', 'fail\n'])
+ SLURM_END_STATES = frozenset(['down\n', 'down*\n',
+ 'drain\n', 'drain*\n',
+ 'fail\n', 'fail*\n'])
+ SLURM_DRAIN_STATES = frozenset(['drain\n', 'drng\n'])
def on_start(self):
arv_node = self._arvados_node()
if arv_node is None:
+ self._nodename = None
return super(ComputeNodeShutdownActor, self).on_start()
else:
self._nodename = arv_node['hostname']
cmd.extend(args)
subprocess.check_output(cmd)
- @ShutdownActorBase._retry((subprocess.CalledProcessError,))
+ def _get_slurm_state(self):
+ return subprocess.check_output(['sinfo', '--noheader', '-o', '%t', '-n', self._nodename])
+
+ # The following methods retry on OSError. This is intended to mitigate bug
+ # #6321 where fork() of node manager raises "OSError: [Errno 12] Cannot
+ # allocate memory" resulting in the untimely death of the shutdown actor
+ # and tends to result in node manager getting into a wedged state where it
+ # won't allocate new nodes or shut down gracefully. The underlying causes
+ # of the excessive memory usage that result in the "Cannot allocate memory"
+ # error are still being investigated.
+
+ @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
def cancel_shutdown(self):
- self._set_node_state('RESUME')
+ if self._nodename:
+ if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
+ # Resume from "drng" or "drain"
+ self._set_node_state('RESUME')
+ else:
+ # Node is in a state such as 'idle' or 'alloc' so don't
+ # try to resume it because that will just raise an error.
+ pass
return super(ComputeNodeShutdownActor, self).cancel_shutdown()
+ @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
@ShutdownActorBase._stop_if_window_closed
- @ShutdownActorBase._retry((subprocess.CalledProcessError,))
def issue_slurm_drain(self):
self._set_node_state('DRAIN', 'Reason=Node Manager shutdown')
self._logger.info("Waiting for SLURM node %s to drain", self._nodename)
self._later.await_slurm_drain()
+ @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
@ShutdownActorBase._stop_if_window_closed
- @ShutdownActorBase._retry((subprocess.CalledProcessError,))
def await_slurm_drain(self):
- output = subprocess.check_output(
- ['sinfo', '--noheader', '-o', '%t', '-n', self._nodename])
+ output = self._get_slurm_state()
if output in self.SLURM_END_STATES:
self._later.shutdown_node()
else:
"""
raise NotImplementedError("BaseComputeNodeDriver.arvados_create_kwargs")
+ def broken(self, cloud_node):
+ """Return true if libcloud has indicated the node is in a "broken" state."""
+ return False
+
def _make_ping_url(self, arvados_node):
return 'https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.format(
self.ping_host, arvados_node['uuid'],
DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
SEARCH_CACHE = {}
+ CLOUD_ERRORS = BaseComputeNodeDriver.CLOUD_ERRORS + (BaseHTTPError,)
def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
driver_class=DEFAULT_DRIVER):
super(ComputeNodeDriver, self).list_nodes()
if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
+ def broken(self, cloud_node):
+ """Return true if libcloud has indicated the node is in a "broken" state."""
+ # UNKNOWN means the node state is unrecognized, which in practice means some combination
+ # of failure that the Azure libcloud driver doesn't know how to interpret.
+ return (cloud_node.state in (cloud_types.NodeState.ERROR, cloud_types.NodeState.UNKNOWN))
+
@classmethod
def node_fqdn(cls, node):
return node.extra["tags"].get("hostname")
timer_actor=self._timer,
arvados_node=None,
poll_stale_after=self.poll_stale_after,
- node_stale_after=self.node_stale_after).proxy()
+ node_stale_after=self.node_stale_after,
+ cloud_client=self._cloud_driver,
+ boot_fail_after=self.boot_fail_after).proxy()
actor.subscribe(self._later.node_can_shutdown)
self._cloud_nodes_actor.subscribe_to(cloud_node.id,
actor.update_cloud_node)
self._pair_nodes(record, arv_rec.arvados_node)
break
for key, record in self.cloud_nodes.orphans.iteritems():
+ if key in self.shutdowns:
+ try:
+ self.shutdowns[key].stop().get()
+ except pykka.ActorDeadError:
+ pass
+ del self.shutdowns[key]
record.actor.stop()
record.cloud_node = None
- self.shutdowns.pop(key, None)
def update_arvados_nodes(self, nodelist):
self._update_poll_time('arvados_nodes')
self.cloud_nodes.nodes.itervalues())
if busy)
+ def _nodes_missing(self):
+ return sum(1 for arv_node in
+ pykka.get_all(rec.actor.arvados_node for rec in
+ self.cloud_nodes.nodes.itervalues()
+ if rec.actor.cloud_node.get().id not in self.shutdowns)
+ if arv_node and cnode.arvados_node_missing(arv_node, self.node_stale_after))
+
def _nodes_wanted(self):
up_count = self._nodes_up()
under_min = self.min_nodes - up_count
elif under_min > 0:
return under_min
else:
- up_count -= len(self.shutdowns) + self._nodes_busy()
+ up_count -= len(self.shutdowns) + self._nodes_busy() + self._nodes_missing()
return len(self.last_wishlist) - up_count
def _nodes_excess(self):
'python-daemon',
],
dependency_links = [
- "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev3.zip"
+ "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev4.zip"
],
scripts=['bin/arvados-node-manager'],
test_suite='tests',
- tests_require=['mock>=1.0', "apache-libcloud==0.18.1.dev3"],
+ tests_require=['pbr<1.7.0', 'mock>=1.0', "apache-libcloud==0.18.1.dev4"],
zip_safe=False,
cmdclass={'egg_info': tagger},
)
self.cloud_node = cloud_node
self.arvados_node = arvados_node
- def make_actor(self, cancellable=True):
+ def make_actor(self, cancellable=True, start_time=None):
if not hasattr(self, 'timer'):
self.make_mocks()
+ if start_time is None:
+ start_time = time.time()
monitor_actor = dispatch.ComputeNodeMonitorActor.start(
- self.cloud_node, time.time(), self.shutdowns,
- testutil.cloud_node_fqdn, self.timer, self.updates,
+ self.cloud_node, start_time, self.shutdowns,
+ testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
self.arvados_node)
self.shutdown_actor = self.ACTOR_CLASS.start(
self.timer, self.cloud_client, self.arvados_client, monitor_actor,
ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
def test_easy_shutdown(self):
- self.make_actor()
+ self.make_actor(start_time=0)
self.check_success_flag(True)
self.assertTrue(self.cloud_client.destroy_node.called)
def test_shutdown_retries_when_cloud_fails(self):
self.make_mocks()
self.cloud_client.destroy_node.return_value = False
- self.make_actor()
+ self.make_actor(start_time=0)
self.assertIsNone(self.shutdown_actor.success.get(self.TIMEOUT))
self.cloud_client.destroy_node.return_value = True
self.check_success_flag(True)
self.updates = mock.MagicMock(name='update_mock')
self.cloud_mock = testutil.cloud_node_mock(node_num)
self.subscriber = mock.Mock(name='subscriber_mock')
+ self.cloud_client = mock.MagicMock(name='cloud_client')
+ self.cloud_client.broken.return_value = False
def make_actor(self, node_num=1, arv_node=None, start_time=None):
if not hasattr(self, 'cloud_mock'):
start_time = time.time()
self.node_actor = dispatch.ComputeNodeMonitorActor.start(
self.cloud_mock, start_time, self.shutdowns,
- testutil.cloud_node_fqdn, self.timer, self.updates,
- arv_node).proxy()
+ testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
+ arv_node, boot_fail_after=300).proxy()
self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
def node_state(self, *states):
self.assertFalse(self.subscriber.called)
def test_shutdown_subscription(self):
- self.make_actor()
+ self.make_actor(start_time=0)
self.shutdowns._set_state(True, 600)
self.node_actor.consider_shutdown().get(self.TIMEOUT)
self.assertTrue(self.subscriber.called)
self.assertEqual(self.node_actor.actor_ref.actor_urn,
self.subscriber.call_args[0][0].actor_ref.actor_urn)
- def test_shutdown_without_arvados_node(self):
+ def test_no_shutdown_booting(self):
self.make_actor()
self.shutdowns._set_state(True, 600)
- self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
- def test_no_shutdown_without_arvados_node_and_old_cloud_node(self):
+ def test_shutdown_without_arvados_node(self):
self.make_actor(start_time=0)
self.shutdowns._set_state(True, 600)
+ self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+ def test_no_shutdown_missing(self):
+ arv_node = testutil.arvados_node_mock(10, job_uuid=None,
+ crunch_worker_state="down",
+ last_ping_at='1970-01-01T01:02:03.04050607Z')
+ self.make_actor(10, arv_node)
+ self.shutdowns._set_state(True, 600)
self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+ def test_no_shutdown_running_broken(self):
+ arv_node = testutil.arvados_node_mock(12, job_uuid=None,
+ crunch_worker_state="down")
+ self.make_actor(12, arv_node)
+ self.shutdowns._set_state(True, 600)
+ self.cloud_client.broken.return_value = True
+ self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
+ def test_shutdown_missing_broken(self):
+ arv_node = testutil.arvados_node_mock(11, job_uuid=None,
+ crunch_worker_state="down",
+ last_ping_at='1970-01-01T01:02:03.04050607Z')
+ self.make_actor(11, arv_node)
+ self.shutdowns._set_state(True, 600)
+ self.cloud_client.broken.return_value = True
+ self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+
def test_no_shutdown_when_window_closed(self):
self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
self.assertFalse(self.node_actor.shutdown_eligible().get(self.TIMEOUT))
def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
# Test we correctly handle a node that failed to bootstrap.
proc_mock.return_value = 'idle\n'
- self.make_actor()
+ self.make_actor(start_time=0)
self.check_success_flag(True)
self.assertFalse(proc_mock.called)
def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
- proc_mock.return_value = 'alloc\n'
+ proc_mock.side_effect = iter(['drng\n', 'idle\n'])
+ self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+ self.make_actor()
+ self.check_success_flag(False, 2)
+ self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
+
+ def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
+ proc_mock.side_effect = iter(['alloc\n'])
self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
self.make_actor()
self.check_success_flag(False, 2)
- self.check_slurm_got_args(proc_mock, 'NodeName=compute99',
- 'State=RESUME')
+ self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
+
+ def test_cancel_shutdown_retry(self, proc_mock):
+ proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
+ self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+ self.make_actor()
+ self.check_success_flag(False, 2)
+
+ def test_issue_slurm_drain_retry(self, proc_mock):
+ proc_mock.side_effect = iter([OSError, '', OSError, 'drng\n'])
+ self.check_success_after_reset(proc_mock)
def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
proc_mock.return_value = 'drain\n'
self.make_daemon([testutil.cloud_node_mock()],
want_sizes=[testutil.MockSize(1)])
self.stop_proxy(self.daemon)
- self.assertFalse(self.node_setup.called)
+ self.assertFalse(self.node_setup.start.called)
+
+ def test_dont_count_missing_as_busy(self):
+ size = testutil.MockSize(1)
+ self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1),
+ testutil.cloud_node_mock(2)],
+ arvados_nodes=[testutil.arvados_node_mock(1),
+ testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+ want_sizes=[size, size])
+ self.stop_proxy(self.daemon)
+ self.assertTrue(self.node_setup.start.called)
+
+ def test_missing_counts_towards_max(self):
+ size = testutil.MockSize(1)
+ self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1),
+ testutil.cloud_node_mock(2)],
+ arvados_nodes=[testutil.arvados_node_mock(1),
+ testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+ want_sizes=[size, size],
+ max_nodes=2)
+ self.stop_proxy(self.daemon)
+ self.assertFalse(self.node_setup.start.called)
+
+ def test_excess_counts_missing(self):
+ size = testutil.MockSize(1)
+ cloud_nodes = [testutil.cloud_node_mock(1), testutil.cloud_node_mock(2)]
+ self.make_daemon(cloud_nodes=cloud_nodes,
+ arvados_nodes=[testutil.arvados_node_mock(1),
+ testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+ want_sizes=[size])
+ self.assertEqual(2, self.alive_monitor_count())
+ for mon_ref in self.monitor_list():
+ self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
+ self.assertEqual(1, self.node_shutdown.start.call_count)
+
+ def test_missing_shutdown_not_excess(self):
+ size = testutil.MockSize(1)
+ cloud_nodes = [testutil.cloud_node_mock(1), testutil.cloud_node_mock(2)]
+ self.make_daemon(cloud_nodes=cloud_nodes,
+ arvados_nodes=[testutil.arvados_node_mock(1),
+ testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
+ want_sizes=[size])
+ self.daemon.shutdowns.get()[cloud_nodes[1].id] = True
+ self.assertEqual(2, self.alive_monitor_count())
+ for mon_ref in self.monitor_list():
+ self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
+ self.assertEqual(0, self.node_shutdown.start.call_count)
def test_booting_nodes_counted(self):
cloud_node = testutil.cloud_node_mock(1)
self.timer.deliver()
self.stop_proxy(self.daemon)
self.assertEqual(1, self.node_setup.start.call_count)
+
+ def test_shutdown_actor_stopped_when_cloud_node_delisted(self):
+ self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+ self.assertEqual(1, self.alive_monitor_count())
+ monitor = self.monitor_list()[0].proxy()
+ self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+ self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+ self.stop_proxy(self.daemon)
+ self.assertEqual(
+ 1, self.node_shutdown.start().proxy().stop().get.call_count)
+
+ def test_shutdown_actor_cleanup_copes_with_dead_actors(self):
+ self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
+ self.assertEqual(1, self.alive_monitor_count())
+ monitor = self.monitor_list()[0].proxy()
+ self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+ # We're mainly testing that update_cloud_nodes catches and handles
+ # the ActorDeadError.
+ stop_method = self.node_shutdown.start().proxy().stop().get
+ stop_method.side_effect = pykka.ActorDeadError
+ self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
+ self.stop_proxy(self.daemon)
+ self.assertEqual(1, stop_method.call_count)
--- /dev/null
+keep-exercise
--- /dev/null
+// Testing tool for Keep services.
+//
+// keepexercise helps measure throughput and test reliability under
+// various usage patterns.
+//
+// By default, it reads and writes blocks containing 2^26 NUL
+// bytes. This generates network traffic without consuming much disk
+// space.
+//
+// For a more realistic test, enable -vary-request. Warning: this will
+// fill your storage volumes with random data if you leave it running,
+// which can cost you money or leave you with too little room for
+// useful data.
+//
+package main
+
+import (
+ "crypto/rand"
+ "encoding/binary"
+ "flag"
+ "io"
+ "io/ioutil"
+ "log"
+ "time"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+// Command line config knobs
+var (
+ BlockSize = flag.Int("block-size", keepclient.BLOCKSIZE, "bytes per read/write op")
+ ReadThreads = flag.Int("rthreads", 1, "number of concurrent readers")
+ WriteThreads = flag.Int("wthreads", 1, "number of concurrent writers")
+ VaryRequest = flag.Bool("vary-request", false, "vary the data for each request: consumes disk space, exercises write behavior")
+ VaryThread = flag.Bool("vary-thread", false, "use -wthreads different data blocks")
+ Replicas = flag.Int("replicas", 1, "replication level for writing")
+ StatsInterval = flag.Duration("stats-interval", time.Second, "time interval between IO stats reports, or 0 to disable")
+)
+
+func main() {
+ flag.Parse()
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ if err != nil {
+ log.Fatal(err)
+ }
+ kc, err := keepclient.MakeKeepClient(&arv)
+ if err != nil {
+ log.Fatal(err)
+ }
+ kc.Want_replicas = *Replicas
+ kc.Client.Timeout = 10 * time.Minute
+
+ nextBuf := make(chan []byte, *WriteThreads)
+ nextLocator := make(chan string, *ReadThreads+*WriteThreads)
+
+ go countBeans(nextLocator)
+ for i := 0; i < *WriteThreads; i++ {
+ go makeBufs(nextBuf, i)
+ go doWrites(kc, nextBuf, nextLocator)
+ }
+ for i := 0; i < *ReadThreads; i++ {
+ go doReads(kc, nextLocator)
+ }
+ <-make(chan struct{})
+}
+
+// Send 1234 to bytesInChan when we receive 1234 bytes from keepstore.
+var bytesInChan = make(chan uint64)
+var bytesOutChan = make(chan uint64)
+
+// Send struct{}{} to errorsChan when an error happens.
+var errorsChan = make(chan struct{})
+
+func countBeans(nextLocator chan string) {
+ t0 := time.Now()
+ var tickChan <-chan time.Time
+ if *StatsInterval > 0 {
+ tickChan = time.NewTicker(*StatsInterval).C
+ }
+ var bytesIn uint64
+ var bytesOut uint64
+ var errors uint64
+ for {
+ select {
+ case <-tickChan:
+ elapsed := time.Since(t0)
+ log.Printf("%v elapsed: read %v bytes (%.1f MiB/s), wrote %v bytes (%.1f MiB/s), errors %d",
+ elapsed,
+ bytesIn, (float64(bytesIn) / elapsed.Seconds() / 1048576),
+ bytesOut, (float64(bytesOut) / elapsed.Seconds() / 1048576),
+ errors,
+ )
+ case i := <-bytesInChan:
+ bytesIn += i
+ case o := <-bytesOutChan:
+ bytesOut += o
+ case <-errorsChan:
+ errors++
+ }
+ }
+}
+
+func makeBufs(nextBuf chan []byte, threadID int) {
+ buf := make([]byte, *BlockSize)
+ if *VaryThread {
+ binary.PutVarint(buf, int64(threadID))
+ }
+ for {
+ if *VaryRequest {
+ if _, err := io.ReadFull(rand.Reader, buf); err != nil {
+ log.Fatal(err)
+ }
+ }
+ nextBuf <- buf
+ }
+}
+
+func doWrites(kc *keepclient.KeepClient, nextBuf chan []byte, nextLocator chan string) {
+ for buf := range nextBuf {
+ locator, _, err := kc.PutB(buf)
+ if err != nil {
+ log.Print(err)
+ errorsChan <- struct{}{}
+ continue
+ }
+ bytesOutChan <- uint64(len(buf))
+ for cap(nextLocator) > len(nextLocator)+*WriteThreads {
+ // Give the readers something to do, unless
+ // they have lots queued up already.
+ nextLocator <- locator
+ }
+ }
+}
+
+func doReads(kc *keepclient.KeepClient, nextLocator chan string) {
+ for locator := range nextLocator {
+ rdr, size, url, err := kc.Get(locator)
+ if err != nil {
+ log.Print(err)
+ errorsChan <- struct{}{}
+ continue
+ }
+ n, err := io.Copy(ioutil.Discard, rdr)
+ rdr.Close()
+ if n != size || err != nil {
+ log.Printf("Got %d bytes (expected %d) from %s: %v", n, size, url, err)
+ errorsChan <- struct{}{}
+ continue
+ // Note we don't count the bytes received in
+ // partial/corrupt responses: we are measuring
+ // throughput, not resource consumption.
+ }
+ bytesInChan <- uint64(n)
+ }
+}
--- /dev/null
+keep-rsync
--- /dev/null
+package main
+
+import (
+ "bufio"
+ "crypto/tls"
+ "errors"
+ "flag"
+ "fmt"
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "os"
+ "regexp"
+ "strings"
+ "time"
+)
+
+func main() {
+ err := doMain()
+ if err != nil {
+ log.Fatalf("%v", err)
+ }
+}
+
+func doMain() error {
+ flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError)
+
+ srcConfigFile := flags.String(
+ "src",
+ "",
+ "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
+
+ dstConfigFile := flags.String(
+ "dst",
+ "",
+ "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
+
+ srcKeepServicesJSON := flags.String(
+ "src-keep-services-json",
+ "",
+ "An optional list of available source keepservices. "+
+ "If not provided, this list is obtained from api server configured in src-config-file.")
+
+ dstKeepServicesJSON := flags.String(
+ "dst-keep-services-json",
+ "",
+ "An optional list of available destination keepservices. "+
+ "If not provided, this list is obtained from api server configured in dst-config-file.")
+
+ replications := flags.Int(
+ "replications",
+ 0,
+ "Number of replications to write to the destination. If replications not specified, "+
+ "default replication level configured on destination server will be used.")
+
+ prefix := flags.String(
+ "prefix",
+ "",
+ "Index prefix")
+
+ // Parse args; omit the first arg which is the command name
+ flags.Parse(os.Args[1:])
+
+ srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile)
+ if err != nil {
+ return fmt.Errorf("Error loading src configuration from file: %s", err.Error())
+ }
+
+ dstConfig, _, err := loadConfig(*dstConfigFile)
+ if err != nil {
+ return fmt.Errorf("Error loading dst configuration from file: %s", err.Error())
+ }
+
+ // setup src and dst keepclients
+ kcSrc, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0)
+ if err != nil {
+ return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
+ }
+
+ kcDst, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications)
+ if err != nil {
+ return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
+ }
+
+ // Copy blocks not found in dst from src
+ err = performKeepRsync(kcSrc, kcDst, srcBlobSigningKey, *prefix)
+ if err != nil {
+ return fmt.Errorf("Error while syncing data: %s", err.Error())
+ }
+
+ return nil
+}
+
+type apiConfig struct {
+ APIToken string
+ APIHost string
+ APIHostInsecure bool
+ ExternalClient bool
+}
+
+// Load src and dst config from given files
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+ if configFile == "" {
+ return config, blobSigningKey, errors.New("config file not specified")
+ }
+
+ config, blobSigningKey, err = readConfigFromFile(configFile)
+ if err != nil {
+ return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
+ }
+
+ return
+}
+
+var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
+
+// Read config from file
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
+ if !strings.Contains(filename, "/") {
+ filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
+ }
+
+ content, err := ioutil.ReadFile(filename)
+
+ if err != nil {
+ return config, "", err
+ }
+
+ lines := strings.Split(string(content), "\n")
+ for _, line := range lines {
+ if line == "" {
+ continue
+ }
+
+ kv := strings.SplitN(line, "=", 2)
+ key := strings.TrimSpace(kv[0])
+ value := strings.TrimSpace(kv[1])
+
+ switch key {
+ case "ARVADOS_API_TOKEN":
+ config.APIToken = value
+ case "ARVADOS_API_HOST":
+ config.APIHost = value
+ case "ARVADOS_API_HOST_INSECURE":
+ config.APIHostInsecure = matchTrue.MatchString(value)
+ case "ARVADOS_EXTERNAL_CLIENT":
+ config.ExternalClient = matchTrue.MatchString(value)
+ case "ARVADOS_BLOB_SIGNING_KEY":
+ blobSigningKey = value
+ }
+ }
+ return
+}
+
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
+ arv := arvadosclient.ArvadosClient{
+ ApiToken: config.APIToken,
+ ApiServer: config.APIHost,
+ ApiInsecure: config.APIHostInsecure,
+ Client: &http.Client{Transport: &http.Transport{
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+ External: config.ExternalClient,
+ }
+
+ // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
+ if keepServicesJSON == "" {
+ kc, err = keepclient.MakeKeepClient(&arv)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ kc = keepclient.New(&arv)
+ err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
+ if err != nil {
+ return kc, err
+ }
+ }
+
+ if isDst {
+ // Get default replications value from destination, if it is not already provided
+ if replications == 0 {
+ value, err := arv.Discovery("defaultCollectionReplication")
+ if err == nil {
+ replications = int(value.(float64))
+ } else {
+ return nil, err
+ }
+ }
+
+ kc.Want_replicas = replications
+ }
+
+ return kc, nil
+}
+
+// Get unique block locators from src and dst
+// Copy any blocks missing in dst
+func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefix string) error {
+ // Get unique locators from src
+ srcIndex, err := getUniqueLocators(kcSrc, prefix)
+ if err != nil {
+ return err
+ }
+
+ // Get unique locators from dst
+ dstIndex, err := getUniqueLocators(kcDst, prefix)
+ if err != nil {
+ return err
+ }
+
+ // Get list of locators found in src, but missing in dst
+ toBeCopied := getMissingLocators(srcIndex, dstIndex)
+
+ // Copy each missing block to dst
+ log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
+ len(srcIndex), len(dstIndex), len(toBeCopied))
+
+ err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
+
+ return err
+}
+
+// Get list of unique locators from the specified cluster
+func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
+ uniqueLocators := map[string]bool{}
+
+ // Get index and dedup
+ for uuid := range kc.LocalRoots() {
+ reader, err := kc.GetIndex(uuid, prefix)
+ if err != nil {
+ return uniqueLocators, err
+ }
+ scanner := bufio.NewScanner(reader)
+ for scanner.Scan() {
+ uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
+ }
+ }
+
+ return uniqueLocators, nil
+}
+
+// Get list of locators that are in src but not in dst
+func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
+ var missingLocators []string
+ for locator := range srcLocators {
+ if _, ok := dstLocators[locator]; !ok {
+ missingLocators = append(missingLocators, locator)
+ }
+ }
+ return missingLocators
+}
+
+// Copy blocks from src to dst; only those that are missing in dst are copied
+func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
+ total := len(toBeCopied)
+
+ startedAt := time.Now()
+ for done, locator := range toBeCopied {
+ if done == 0 {
+ log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
+ float64(done)/float64(total)*100, locator)
+ } else {
+ timePerBlock := time.Since(startedAt) / time.Duration(done)
+ log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total,
+ float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
+ }
+
+ getLocator := locator
+ expiresAt := time.Now().AddDate(0, 0, 1)
+ if blobSigningKey != "" {
+ getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
+ }
+
+ reader, len, _, err := kcSrc.Get(getLocator)
+ if err != nil {
+ return fmt.Errorf("Error getting block: %v %v", locator, err)
+ }
+
+ _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
+ if err != nil {
+ return fmt.Errorf("Error copying data block: %v %v", locator, err)
+ }
+ }
+
+ log.Printf("Successfully copied to destination %d blocks.", total)
+ return nil
+}
--- /dev/null
+package main
+
+import (
+ "crypto/md5"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
+
+ . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+ TestingT(t)
+}
+
+// Gocheck boilerplate
+var _ = Suite(&ServerRequiredSuite{})
+var _ = Suite(&ServerNotRequiredSuite{})
+var _ = Suite(&DoMainTestSuite{})
+
+// Tests that require the Keep server running
+type ServerRequiredSuite struct{}
+type ServerNotRequiredSuite struct{}
+type DoMainTestSuite struct{}
+
+func (s *ServerRequiredSuite) SetUpSuite(c *C) {
+ // Start API server
+ arvadostest.StartAPI()
+}
+
+func (s *ServerRequiredSuite) TearDownSuite(c *C) {
+ arvadostest.StopAPI()
+ arvadostest.ResetEnv()
+}
+
+var initialArgs []string
+
+func (s *DoMainTestSuite) SetUpSuite(c *C) {
+ initialArgs = os.Args
+}
+
+var kcSrc, kcDst *keepclient.KeepClient
+var srcKeepServicesJSON, dstKeepServicesJSON, blobSigningKey string
+
+func (s *ServerRequiredSuite) SetUpTest(c *C) {
+ // reset all variables between tests
+ blobSigningKey = ""
+ srcKeepServicesJSON = ""
+ dstKeepServicesJSON = ""
+ kcSrc = &keepclient.KeepClient{}
+ kcDst = &keepclient.KeepClient{}
+}
+
+func (s *ServerRequiredSuite) TearDownTest(c *C) {
+ arvadostest.StopKeep(3)
+}
+
+func (s *DoMainTestSuite) SetUpTest(c *C) {
+ args := []string{"keep-rsync"}
+ os.Args = args
+}
+
+func (s *DoMainTestSuite) TearDownTest(c *C) {
+ os.Args = initialArgs
+}
+
+var testKeepServicesJSON = "{ \"kind\":\"arvados#keepServiceList\", \"etag\":\"\", \"self_link\":\"\", \"offset\":null, \"limit\":null, \"items\":[ { \"href\":\"/keep_services/zzzzz-bi6l4-123456789012340\", \"kind\":\"arvados#keepService\", \"etag\":\"641234567890enhj7hzx432e5\", \"uuid\":\"zzzzz-bi6l4-123456789012340\", \"owner_uuid\":\"zzzzz-tpzed-123456789012345\", \"service_host\":\"keep0.zzzzz.arvadosapi.com\", \"service_port\":25107, \"service_ssl_flag\":false, \"service_type\":\"disk\", \"read_only\":false }, { \"href\":\"/keep_services/zzzzz-bi6l4-123456789012341\", \"kind\":\"arvados#keepService\", \"etag\":\"641234567890enhj7hzx432e5\", \"uuid\":\"zzzzz-bi6l4-123456789012341\", \"owner_uuid\":\"zzzzz-tpzed-123456789012345\", \"service_host\":\"keep0.zzzzz.arvadosapi.com\", \"service_port\":25108, \"service_ssl_flag\":false, \"service_type\":\"disk\", \"read_only\":false } ], \"items_available\":2 }"
+
+// Testing keep-rsync needs two sets of keep services: src and dst.
+// The test setup hence creates 3 servers instead of the default 2,
+// and uses the first 2 as src and the 3rd as dst keep servers.
+func setupRsync(c *C, enforcePermissions bool, replications int) {
+ // srcConfig
+ var srcConfig apiConfig
+ srcConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+ srcConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+ srcConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+ // dstConfig
+ var dstConfig apiConfig
+ dstConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+ dstConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+ dstConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+ if enforcePermissions {
+ blobSigningKey = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
+ }
+
+ // Start Keep servers
+ arvadostest.StartKeep(3, enforcePermissions)
+
+ // setup keepclients
+ var err error
+ kcSrc, err = setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
+ c.Check(err, IsNil)
+
+ kcDst, err = setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
+ c.Check(err, IsNil)
+
+ for uuid := range kcSrc.LocalRoots() {
+ if strings.HasSuffix(uuid, "02") {
+ delete(kcSrc.LocalRoots(), uuid)
+ }
+ }
+ for uuid := range kcSrc.GatewayRoots() {
+ if strings.HasSuffix(uuid, "02") {
+ delete(kcSrc.GatewayRoots(), uuid)
+ }
+ }
+ for uuid := range kcSrc.WritableLocalRoots() {
+ if strings.HasSuffix(uuid, "02") {
+ delete(kcSrc.WritableLocalRoots(), uuid)
+ }
+ }
+
+ for uuid := range kcDst.LocalRoots() {
+ if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+ delete(kcDst.LocalRoots(), uuid)
+ }
+ }
+ for uuid := range kcDst.GatewayRoots() {
+ if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+ delete(kcDst.GatewayRoots(), uuid)
+ }
+ }
+ for uuid := range kcDst.WritableLocalRoots() {
+ if strings.HasSuffix(uuid, "00") || strings.HasSuffix(uuid, "01") {
+ delete(kcDst.WritableLocalRoots(), uuid)
+ }
+ }
+
+ if replications == 0 {
+ // Must have got default replications value of 2 from dst discovery document
+ c.Assert(kcDst.Want_replicas, Equals, 2)
+ } else {
+ // Since replications value is provided, it is used
+ c.Assert(kcDst.Want_replicas, Equals, replications)
+ }
+}
+
+func (s *ServerRequiredSuite) TestRsyncPutInOne_GetFromOtherShouldFail(c *C) {
+ setupRsync(c, false, 1)
+
+ // Put a block in src and verify that it is not found in dst
+ testNoCrosstalk(c, "test-data-1", kcSrc, kcDst)
+
+ // Put a block in dst and verify that it is not found in src
+ testNoCrosstalk(c, "test-data-2", kcDst, kcSrc)
+}
+
+func (s *ServerRequiredSuite) TestRsyncWithBlobSigning_PutInOne_GetFromOtherShouldFail(c *C) {
+ setupRsync(c, true, 1)
+
+ // Put a block in src and verify that it is not found in dst
+ testNoCrosstalk(c, "test-data-1", kcSrc, kcDst)
+
+ // Put a block in dst and verify that it is not found in src
+ testNoCrosstalk(c, "test-data-2", kcDst, kcSrc)
+}
+
+// Do a Put in the first and Get from the second,
+// which should raise block not found error.
+func testNoCrosstalk(c *C, testData string, kc1, kc2 *keepclient.KeepClient) {
+ // Put a block using kc1
+ locator, _, err := kc1.PutB([]byte(testData))
+ c.Assert(err, Equals, nil)
+
+ locator = strings.Split(locator, "+")[0]
+ _, _, _, err = kc2.Get(keepclient.SignLocator(locator, kc2.Arvados.ApiToken, time.Now().AddDate(0, 0, 1), []byte(blobSigningKey)))
+ c.Assert(err, NotNil)
+ c.Check(err.Error(), Equals, "Block not found")
+}
+
+// Test keep-rsync initialization, with srcKeepServicesJSON
+func (s *ServerRequiredSuite) TestRsyncInitializeWithKeepServicesJSON(c *C) {
+ srcKeepServicesJSON = testKeepServicesJSON
+
+ setupRsync(c, false, 1)
+
+ localRoots := kcSrc.LocalRoots()
+ c.Check(localRoots, NotNil)
+
+ foundIt := false
+ for k := range localRoots {
+ if k == "zzzzz-bi6l4-123456789012340" {
+ foundIt = true
+ }
+ }
+ c.Check(foundIt, Equals, true)
+
+ foundIt = false
+ for k := range localRoots {
+ if k == "zzzzz-bi6l4-123456789012341" {
+ foundIt = true
+ }
+ }
+ c.Check(foundIt, Equals, true)
+}
+
+// Test keep-rsync initialization with default replications count
+func (s *ServerRequiredSuite) TestInitializeRsyncDefaultReplicationsCount(c *C) {
+ setupRsync(c, false, 0)
+}
+
+// Test keep-rsync initialization with replications count argument
+func (s *ServerRequiredSuite) TestInitializeRsyncReplicationsCount(c *C) {
+ setupRsync(c, false, 3)
+}
+
+// Put some blocks in Src and some more in Dst
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync(c *C) {
+ testKeepRsync(c, false, "")
+}
+
+// Put some blocks in Src and some more in Dst with blob signing enabled.
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithBlobSigning(c *C) {
+ testKeepRsync(c, true, "")
+}
+
+// Put some blocks in Src and some more in Dst
+// Use prefix while doing rsync
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithPrefix(c *C) {
+ data := []byte("test-data-4")
+ hash := fmt.Sprintf("%x", md5.Sum(data))
+
+ testKeepRsync(c, false, hash[0:3])
+ c.Check(len(dstIndex) > len(dstLocators), Equals, true)
+}
+
+// Put some blocks in Src and some more in Dst
+// Use prefix not in src while doing rsync
+// And copy missing blocks from Src to Dst
+func (s *ServerRequiredSuite) TestKeepRsync_WithNoSuchPrefixInSrc(c *C) {
+ testKeepRsync(c, false, "999")
+ c.Check(len(dstIndex), Equals, len(dstLocators))
+}
+
+// Put 5 blocks in src. Put 2 of those blocks in dst
+// Hence there are 3 additional blocks in src
+// Also, put 2 extra blocks in dst; they are hence only in dst
+// Run rsync and verify that those 7 blocks are now available in dst
+func testKeepRsync(c *C, enforcePermissions bool, prefix string) {
+ setupRsync(c, enforcePermissions, 1)
+
+ // setupTestData
+ setupTestData(c, prefix)
+
+ err := performKeepRsync(kcSrc, kcDst, blobSigningKey, prefix)
+ c.Check(err, IsNil)
+
+ // Now GetIndex from dst and verify that all 5 from src and the 2 extra blocks are found
+ dstIndex, err = getUniqueLocators(kcDst, "")
+ c.Check(err, IsNil)
+
+ for _, locator := range srcLocatorsMatchingPrefix {
+ _, ok := dstIndex[locator]
+ c.Assert(ok, Equals, true)
+ }
+
+ for _, locator := range extraDstLocators {
+ _, ok := dstIndex[locator]
+ c.Assert(ok, Equals, true)
+ }
+
+ if prefix == "" {
+ // all blocks from src and the two extra blocks
+ c.Assert(len(dstIndex), Equals, len(srcLocators)+len(extraDstLocators))
+ } else {
+ // 1 matching prefix and copied over, 2 that were initially copied into dst along with src, and the 2 extra blocks
+ c.Assert(len(dstIndex), Equals, len(srcLocatorsMatchingPrefix)+len(extraDstLocators)+2)
+ }
+}
+
+// Setup test data in src and dst.
+var srcLocators, srcLocatorsMatchingPrefix, dstLocators, extraDstLocators []string
+var dstIndex map[string]bool
+
+func setupTestData(c *C, indexPrefix string) {
+ srcLocators = []string{}
+ srcLocatorsMatchingPrefix = []string{}
+ dstLocators = []string{}
+ extraDstLocators = []string{}
+ dstIndex = make(map[string]bool)
+
+ // Put a few blocks in src using kcSrc
+ for i := 0; i < 5; i++ {
+ hash, _, err := kcSrc.PutB([]byte(fmt.Sprintf("test-data-%d", i)))
+ c.Check(err, IsNil)
+
+ srcLocators = append(srcLocators, strings.Split(hash, "+A")[0])
+ if strings.HasPrefix(hash, indexPrefix) {
+ srcLocatorsMatchingPrefix = append(srcLocatorsMatchingPrefix, strings.Split(hash, "+A")[0])
+ }
+ }
+
+ // Put first two of those src blocks in dst using kcDst
+ for i := 0; i < 2; i++ {
+ hash, _, err := kcDst.PutB([]byte(fmt.Sprintf("test-data-%d", i)))
+ c.Check(err, IsNil)
+ dstLocators = append(dstLocators, strings.Split(hash, "+A")[0])
+ }
+
+ // Put two more blocks in dst; they are not in src at all
+ for i := 0; i < 2; i++ {
+ hash, _, err := kcDst.PutB([]byte(fmt.Sprintf("other-data-%d", i)))
+ c.Check(err, IsNil)
+ dstLocators = append(dstLocators, strings.Split(hash, "+A")[0])
+ extraDstLocators = append(extraDstLocators, strings.Split(hash, "+A")[0])
+ }
+}
+
+// Setup rsync using srcKeepServicesJSON with fake keepservers.
+// Expect error during performKeepRsync due to unreachable src keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeSrcKeepservers(c *C) {
+ srcKeepServicesJSON = testKeepServicesJSON
+
+ setupRsync(c, false, 1)
+
+ err := performKeepRsync(kcSrc, kcDst, "", "")
+ c.Check(strings.HasSuffix(err.Error(), "no such host"), Equals, true)
+}
+
+// Setup rsync using dstKeepServicesJSON with fake keepservers.
+// Expect error during performKeepRsync due to unreachable dst keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeDstKeepservers(c *C) {
+ dstKeepServicesJSON = testKeepServicesJSON
+
+ setupRsync(c, false, 1)
+
+ err := performKeepRsync(kcSrc, kcDst, "", "")
+ c.Check(strings.HasSuffix(err.Error(), "no such host"), Equals, true)
+}
+
+// Test rsync with signature error during Get from src.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorGettingBlockFromSrc(c *C) {
+ setupRsync(c, true, 1)
+
+ // put some blocks in src and dst
+ setupTestData(c, "")
+
+ // Change blob signing key to a fake key, so that Get from src fails
+ blobSigningKey = "thisisfakeblobsigningkey"
+
+ err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+ c.Check(strings.HasSuffix(err.Error(), "Block not found"), Equals, true)
+}
+
+// Test rsync with error during Put to src.
+func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorPuttingBlockInDst(c *C) {
+ setupRsync(c, false, 1)
+
+ // put some blocks in src and dst
+ setupTestData(c, "")
+
+ // Increase Want_replicas on dst to result in insufficient replicas error during Put
+ kcDst.Want_replicas = 2
+
+ err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+ c.Check(strings.HasSuffix(err.Error(), "Could not write sufficient replicas"), Equals, true)
+}
+
+// Test loadConfig func
+func (s *ServerNotRequiredSuite) TestLoadConfig(c *C) {
+ // Setup a src config file
+ srcFile := setupConfigFile(c, "src-config")
+ defer os.Remove(srcFile.Name())
+ srcConfigFile := srcFile.Name()
+
+ // Setup a dst config file
+ dstFile := setupConfigFile(c, "dst-config")
+ defer os.Remove(dstFile.Name())
+ dstConfigFile := dstFile.Name()
+
+ // load configuration from those files
+ srcConfig, srcBlobSigningKey, err := loadConfig(srcConfigFile)
+ c.Check(err, IsNil)
+
+ c.Assert(srcConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+ c.Assert(srcConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+ c.Assert(srcConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+ c.Assert(srcConfig.ExternalClient, Equals, false)
+
+ dstConfig, _, err := loadConfig(dstConfigFile)
+ c.Check(err, IsNil)
+
+ c.Assert(dstConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+ c.Assert(dstConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+ c.Assert(dstConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+ c.Assert(dstConfig.ExternalClient, Equals, false)
+
+ c.Assert(srcBlobSigningKey, Equals, "abcdefg")
+}
+
+// Test loadConfig func without setting up the config files
+func (s *ServerNotRequiredSuite) TestLoadConfig_MissingSrcConfig(c *C) {
+ _, _, err := loadConfig("")
+ c.Assert(err.Error(), Equals, "config file not specified")
+}
+
+// Test loadConfig func - error reading config
+func (s *ServerNotRequiredSuite) TestLoadConfig_ErrorLoadingSrcConfig(c *C) {
+ _, _, err := loadConfig("no-such-config-file")
+ c.Assert(strings.HasSuffix(err.Error(), "no such file or directory"), Equals, true)
+}
+
+func setupConfigFile(c *C, name string) *os.File {
+ // Setup a config file
+ file, err := ioutil.TempFile(os.TempDir(), name)
+ c.Check(err, IsNil)
+
+ fileContent := "ARVADOS_API_HOST=" + os.Getenv("ARVADOS_API_HOST") + "\n"
+ fileContent += "ARVADOS_API_TOKEN=" + os.Getenv("ARVADOS_API_TOKEN") + "\n"
+ fileContent += "ARVADOS_API_HOST_INSECURE=" + os.Getenv("ARVADOS_API_HOST_INSECURE") + "\n"
+ fileContent += "ARVADOS_EXTERNAL_CLIENT=false\n"
+ fileContent += "ARVADOS_BLOB_SIGNING_KEY=abcdefg"
+
+ _, err = file.Write([]byte(fileContent))
+ c.Check(err, IsNil)
+
+ return file
+}
+
+func (s *DoMainTestSuite) Test_doMain_NoSrcConfig(c *C) {
+ err := doMain()
+ c.Check(err, NotNil)
+ c.Assert(err.Error(), Equals, "Error loading src configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMain_SrcButNoDstConfig(c *C) {
+ srcConfig := setupConfigFile(c, "src")
+ args := []string{"-replications", "3", "-src", srcConfig.Name()}
+ os.Args = append(os.Args, args...)
+ err := doMain()
+ c.Check(err, NotNil)
+ c.Assert(err.Error(), Equals, "Error loading dst configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMain_BadSrcConfig(c *C) {
+ args := []string{"-src", "abcd"}
+ os.Args = append(os.Args, args...)
+ err := doMain()
+ c.Check(err, NotNil)
+ c.Assert(strings.HasPrefix(err.Error(), "Error loading src configuration from file: Error reading config file"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithReplicationsButNoSrcConfig(c *C) {
+ args := []string{"-replications", "3"}
+ os.Args = append(os.Args, args...)
+ err := doMain()
+ c.Check(err, NotNil)
+ c.Assert(err.Error(), Equals, "Error loading src configuration from file: config file not specified")
+}
+
+func (s *DoMainTestSuite) Test_doMainWithSrcAndDstConfig(c *C) {
+ srcConfig := setupConfigFile(c, "src")
+ dstConfig := setupConfigFile(c, "dst")
+ args := []string{"-src", srcConfig.Name(), "-dst", dstConfig.Name()}
+ os.Args = append(os.Args, args...)
+
+ // Start keepservers. Since we are not doing any tweaking as in setupRsync func,
+ // kcSrc and kcDst will be the same and no actual copying to dst will happen, but that's ok.
+ arvadostest.StartKeep(2, false)
+
+ err := doMain()
+ c.Check(err, IsNil)
+}