Merge branch 'master' into 15531-logincluster-migrate
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 8 Oct 2019 19:21:32 +0000 (15:21 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 8 Oct 2019 19:21:32 +0000 (15:21 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

138 files changed:
apps/workbench/Gemfile
apps/workbench/Gemfile.lock
apps/workbench/app/views/projects/_show_dashboard.html.erb
apps/workbench/test/controllers/projects_controller_test.rb
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/integration/logins_test.rb
apps/workbench/test/integration/user_profile_test.rb
apps/workbench/test/integration/work_units_test.rb
build/run-library.sh
build/run-tests.sh
doc/_config.yml
doc/_includes/_assign_volume_uuid.liquid [new file with mode: 0644]
doc/admin/collection-versioning.html.textile.liquid
doc/admin/config-migration.html.textile.liquid
doc/admin/keep-balance.html.textile.liquid [new file with mode: 0644]
doc/admin/upgrading.html.textile.liquid
doc/api/methods/containers.html.textile.liquid
doc/api/permission-model.html.textile.liquid
doc/install/configure-azure-blob-storage.html.textile.liquid
doc/install/configure-fs-storage.html.textile.liquid
doc/install/configure-s3-object-storage.html.textile.liquid
doc/install/install-keep-balance.html.textile.liquid
doc/install/install-keepstore.html.textile.liquid
lib/config/cmd_test.go
lib/config/config.default.yml
lib/config/deprecated.go
lib/config/deprecated_keepstore.go [new file with mode: 0644]
lib/config/deprecated_keepstore_test.go [new file with mode: 0644]
lib/config/deprecated_test.go
lib/config/export.go
lib/config/generated_config.go
lib/config/load.go
lib/config/load_test.go
lib/controller/cmd.go
lib/controller/handler_test.go
lib/dispatchcloud/cmd.go
lib/dispatchcloud/dispatcher.go
lib/dispatchcloud/dispatcher_test.go
lib/service/cmd.go
lib/service/cmd_test.go
lib/service/tls.go [new file with mode: 0644]
sdk/cli/arvados-cli.gemspec
sdk/cli/test/test_arv-collection-create.rb
sdk/cli/test/test_arv-get.rb
sdk/cli/test/test_arv-keep-get.rb
sdk/cli/test/test_arv-keep-put.rb
sdk/cli/test/test_arv-tag.rb
sdk/cli/test/test_arv-ws.rb
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/wf/runin-reqs-wf.cwl
sdk/cwl/tests/wf/runin-reqs-wf2.cwl
sdk/cwl/tests/wf/runin-reqs-wf3.cwl
sdk/cwl/tests/wf/runin-reqs-wf4.cwl
sdk/cwl/tests/wf/runin-reqs-wf5.cwl [new file with mode: 0644]
sdk/go/arvados/config.go
sdk/go/arvados/keep_service.go
sdk/go/arvadostest/run_servers.go
sdk/go/ctxlog/log.go
sdk/go/httpserver/httpserver.go
sdk/go/httpserver/request_limiter.go
sdk/go/httpserver/request_limiter_test.go
sdk/python/tests/run_test_server.py
sdk/python/tests/test_arv_put.py
sdk/python/tests/test_keep_client.py
sdk/ruby/arvados.gemspec
services/api/app/controllers/arvados/v1/keep_services_controller.rb
services/api/app/models/container.rb
services/api/app/models/keep_service.rb
services/api/test/functional/arvados/v1/keep_services_controller_test.rb
services/api/test/unit/container_test.rb
services/arv-git-httpd/auth_handler_test.go
services/arv-git-httpd/git_handler_test.go
services/arv-git-httpd/gitolite_test.go
services/arv-git-httpd/integration_test.go
services/crunch-run/git_mount_test.go
services/fuse/tests/integration_test.py
services/fuse/tests/test_exec.py
services/health/main.go
services/keep-balance/balance.go
services/keep-balance/balance_run_test.go
services/keep-balance/balance_test.go
services/keep-balance/collection_test.go
services/keep-balance/integration_test.go
services/keep-balance/keep-balance.service
services/keep-balance/main.go
services/keep-balance/main_test.go [deleted file]
services/keep-balance/metrics.go
services/keep-balance/server.go
services/keep-balance/usage.go [deleted file]
services/keep-web/handler_test.go
services/keep-web/server_test.go
services/keepproxy/keepproxy_test.go
services/keepstore/azure_blob_volume.go
services/keepstore/azure_blob_volume_test.go
services/keepstore/bufferpool.go
services/keepstore/bufferpool_test.go
services/keepstore/command.go [new file with mode: 0644]
services/keepstore/command_test.go [new file with mode: 0644]
services/keepstore/config.go [deleted file]
services/keepstore/config_test.go [deleted file]
services/keepstore/deprecated.go [deleted file]
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/handlers_with_generic_volume_test.go [deleted file]
services/keepstore/keepstore.go
services/keepstore/keepstore.service
services/keepstore/keepstore_test.go [deleted file]
services/keepstore/metrics.go
services/keepstore/mounts_test.go
services/keepstore/perms.go
services/keepstore/perms_test.go
services/keepstore/proxy_remote.go
services/keepstore/proxy_remote_test.go
services/keepstore/pull_worker.go
services/keepstore/pull_worker_integration_test.go
services/keepstore/pull_worker_test.go
services/keepstore/s3_volume.go
services/keepstore/s3_volume_test.go
services/keepstore/server.go [deleted file]
services/keepstore/server_test.go [deleted file]
services/keepstore/status_test.go
services/keepstore/trash_worker.go
services/keepstore/trash_worker_test.go
services/keepstore/unix_volume.go
services/keepstore/unix_volume_test.go
services/keepstore/usage.go [deleted file]
services/keepstore/volume.go
services/keepstore/volume_generic_test.go
services/keepstore/volume_test.go
services/login-sync/Gemfile.lock
services/login-sync/arvados-login-sync.gemspec
services/ws/server_test.go
tools/arvbox/lib/arvbox/docker/cluster-config.sh
tools/arvbox/lib/arvbox/docker/keep-setup.sh
tools/arvbox/lib/arvbox/docker/service/composer/run-service
tools/arvbox/lib/arvbox/docker/service/nginx/run
tools/arvbox/lib/arvbox/docker/service/workbench2/run-service

index ce2a1377d72a3f9f6bbbc3cf92243921eff137cf..bc62407bc5173fa77b63daaa7f6f58882c29be33 100644 (file)
@@ -90,9 +90,6 @@ gem 'sshkey'
 # Deploy with Capistrano
 # gem 'capistrano'
 
-# To use debugger
-#gem 'byebug'
-
 gem 'passenger', :group => :production
 gem 'andand'
 gem 'RedCloth'
index 548da1dc049bf32a2abc886e2c451f2b5b1927ab..ce328dc8954393d24b295fddaccf1b1487715a5f 100644 (file)
@@ -53,12 +53,12 @@ GEM
       i18n (>= 0.7, < 2)
       minitest (~> 5.1)
       tzinfo (~> 1.1)
-    addressable (2.6.0)
-      public_suffix (>= 2.0.2, < 4.0)
+    addressable (2.7.0)
+      public_suffix (>= 2.0.2, < 5.0)
     andand (1.3.3)
     angularjs-rails (1.3.15)
     arel (7.1.4)
-    arvados (1.3.1.20190320201707)
+    arvados (1.3.3.20190320201707)
       activesupport (>= 3)
       andand (~> 1.3, >= 1.3.3)
       arvados-google-api-client (>= 0.7, < 0.8.9)
@@ -127,7 +127,7 @@ GEM
     flamegraph (0.9.5)
     globalid (0.4.2)
       activesupport (>= 4.2.0)
-    googleauth (0.8.1)
+    googleauth (0.9.0)
       faraday (~> 0.12)
       jwt (>= 1.4, < 3.0)
       memoist (~> 0.16)
@@ -153,7 +153,7 @@ GEM
       actionpack (>= 4)
       less (~> 2.6.0)
       sprockets (>= 2)
-    libv8 (3.16.14.19)
+    libv8 (3.16.14.19-x86_64-linux)
     lograge (0.10.0)
       actionpack (>= 4)
       activesupport (>= 4)
@@ -179,7 +179,7 @@ GEM
     morrisjs-rails (0.5.1.2)
       railties (> 3.1, < 6)
     multi_json (1.13.1)
-    multipart-post (2.0.0)
+    multipart-post (2.1.1)
     net-scp (2.0.0)
       net-ssh (>= 2.6.5, < 6.0.0)
     net-sftp (2.1.2)
@@ -188,12 +188,12 @@ GEM
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
     nio4r (2.3.1)
-    nokogiri (1.10.2)
+    nokogiri (1.10.4)
       mini_portile2 (~> 2.4.0)
     npm-rails (0.2.1)
       rails (>= 3.2)
-    oj (3.7.11)
-    os (1.0.0)
+    oj (3.7.12)
+    os (1.0.1)
     passenger (6.0.2)
       rack
       rake (>= 0.8.1)
@@ -206,7 +206,7 @@ GEM
       cliver (~> 0.3.1)
       multi_json (~> 1.0)
       websocket-driver (>= 0.2.0)
-    public_suffix (3.0.3)
+    public_suffix (4.0.1)
     rack (2.0.7)
     rack-mini-profiler (1.0.2)
       rack (>= 1.2.0)
@@ -254,7 +254,7 @@ GEM
     retriable (1.4.1)
     ruby-debug-passenger (0.2.0)
     ruby-prof (0.17.0)
-    rubyzip (1.2.2)
+    rubyzip (1.3.0)
     rvm-capistrano (1.5.6)
       capistrano (~> 2.15.4)
     safe_yaml (1.0.5)
index 71ef2454190d7625cae12cf77d44f05a1ad7ed27..6c58cd30fc723f7a662b2b6b6728cb719519136b 100644 (file)
@@ -36,7 +36,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
 
 <%
   recent_procs_panel_width = 6
-  if !PipelineInstance.api_exists?(:index)
+  if !PipelineInstance.api_exists?(:create)
     recent_procs_title = 'Recent processes'
     run_proc_title = 'Choose a workflow to run:'
     show_node_status = false
index 0f79168901364f72c232c85cd981eb0540efd084..dd828952be2fb828e8f1e1b11b1cfc1616515683 100644 (file)
@@ -396,10 +396,7 @@ EOT
     end
   end
 
-  [
-    [:admin, true],
-    [:active, false],
-  ].each do |user, expect_all_nodes|
+  [:admin, :active].each do |user|
     test "in dashboard other index page links as #{user}" do
       get :index, params: {}, session: session_for(user)
 
@@ -409,14 +406,6 @@ EOT
         assert_includes @response.body, "href=\"#{path}\""
         assert_includes @response.body, "All #{target}"
       end
-
-      if expect_all_nodes
-        assert_includes @response.body, "href=\"/nodes\""
-        assert_includes @response.body, "All nodes"
-      else
-        assert_not_includes @response.body, "href=\"/nodes\""
-        assert_not_includes @response.body, "All nodes"
-      end
     end
   end
 
index dc958d3b5e23295bd2013de9c0a4af9db419b0d0..e28809e1318ba42c572d9f1a3eca94387d9a39b2 100644 (file)
@@ -214,7 +214,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
 
         first('button', text: 'x').click
       end
-      assert_text 'Recent pipelines and processes' # seeing dashboard now
+      assert_text 'Recent processes' # seeing dashboard now
     end
   end
 
@@ -285,7 +285,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
     test "visit dashboard as #{token}" do
       visit page_with_token(token)
 
-      assert_text 'Recent pipelines and processes' # seeing dashboard now
+      assert_text 'Recent processes' # seeing dashboard now
       within('.recent-processes-actions') do
         assert page.has_link?('Run a process')
         assert page.has_link?('All processes')
@@ -307,19 +307,6 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
           assert page.has_link? 'foo_file'
         end
       end
-
-      within('.compute-node-actions') do
-        if is_admin
-          assert page.has_link?('All nodes')
-        else
-          assert page.has_no_link?('All nodes')
-        end
-      end
-
-      within('.compute-node-summary-pane') do
-        click_link 'Details'
-        assert_text 'compute0'
-      end
     end
   end
 end
index 7f2774ce2f33beb7d596a32dc89569c984ff17f2..f079fbb8f1ce39eb4d88e33f232c3f8796dce89c 100644 (file)
@@ -11,7 +11,7 @@ class LoginsTest < ActionDispatch::IntegrationTest
 
   test "login with api_token works after redirect" do
     visit page_with_token('active_trustedclient')
-    assert page.has_text?('Recent pipelines and processes'), "Missing 'Recent pipelines and processes' from page"
+    assert page.has_text?('Recent processes'), "Missing 'Recent processes' from page"
     assert_no_match(/\bapi_token=/, current_path)
   end
 
index 547ef06a6827f013b0e958226e10d87ef57464fe..30d4943c62018473822877b475023544f73ac503 100644 (file)
@@ -24,7 +24,7 @@ class UserProfileTest < ActionDispatch::IntegrationTest
         assert_text('Save profile')
         add_profile user
       else
-        assert_text('Recent pipelines and processes')
+        assert_text('Recent processes')
         assert_no_text('Save profile')
       end
     elsif invited
@@ -126,7 +126,7 @@ class UserProfileTest < ActionDispatch::IntegrationTest
     end
 
     # profile saved and in home page now
-    assert_text('Recent pipelines and processes')
+    assert_text('Recent processes')
   end
 
   [
index fe73f2734f3d9473a82c9ddb7e11498613ad645d..9d4f5905553d96f9fbc3500009dad5d11bdd49b7 100644 (file)
@@ -145,7 +145,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
   ].each do |template_name, preview_txt, process_txt|
     test "run a process using template #{template_name} from dashboard" do
       visit page_with_token('admin')
-      assert_text 'Recent pipelines and processes' # seeing dashboard now
+      assert_text 'Recent processes' # seeing dashboard now
 
       within('.recent-processes-actions') do
         assert page.has_link?('All processes')
index cb5549df7183676217270adc03b98b1ddbb0cf25..95f2ff1452c4a4477d5c62be391148802899383f 100755 (executable)
@@ -164,7 +164,7 @@ package_go_binary() {
       return 1
     fi
 
-    go get -ldflags "-X main.version=${go_package_version}" "git.curoverse.com/arvados.git/$src_path"
+    go get -ldflags "-X git.curoverse.com/arvados.git/lib/cmd.version=${go_package_version} -X main.version=${go_package_version}" "git.curoverse.com/arvados.git/$src_path"
 
     local -a switches=()
     systemd_unit="$WORKSPACE/${src_path}/${prog}.service"
@@ -209,6 +209,26 @@ _build_rails_package_scripts() {
     done
 }
 
+rails_package_version() {
+    local pkgname="$1"; shift
+    if [[ -n "$ARVADOS_BUILDING_VERSION" ]]; then
+        echo "$ARVADOS_BUILDING_VERSION"
+        return
+    fi
+    local version="$(version_from_git)"
+    if [ $pkgname = "arvados-api-server" -o $pkgname = "arvados-workbench" ] ; then
+       local P="$PWD"
+       cd $WORKSPACE
+       local arvados_server_version
+       calculate_go_package_version arvados_server_version cmd/arvados-server
+       cd $P
+       if [ $arvados_server_version > $version ] ; then
+           version=$arvados_server_version
+       fi
+    fi
+    echo $version
+}
+
 test_rails_package_presence() {
   local pkgname="$1"; shift
   local srcdir="$1"; shift
@@ -221,7 +241,7 @@ test_rails_package_presence() {
 
   cd $srcdir
 
-  local version="$(version_from_git)"
+  local version="$(rails_package_version $pkgname)"
 
   cd $tmppwd
 
@@ -312,11 +332,11 @@ test_package_presence() {
         repo_subdir=${pkgname:0:1}
       fi
 
-      repo_pkg_list=$(curl -s -o - http://apt.arvados.org/pool/${D}/main/${repo_subdir}/)
-      echo ${repo_pkg_list} |grep -q ${full_pkgname}
+      repo_pkg_list=$(curl -s -o - http://apt.arvados.org/pool/${D}-dev/main/${repo_subdir}/${pkgname}/)
+      echo "${repo_pkg_list}" |grep -q ${full_pkgname}
       if [ $? -eq 0 ] ; then
         echo "Package $full_pkgname exists upstream, not rebuilding, downloading instead!"
-        curl -s -o "$WORKSPACE/packages/$TARGET/${full_pkgname}" http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${full_pkgname}
+        curl -s -o "$WORKSPACE/packages/$TARGET/${full_pkgname}" http://apt.arvados.org/pool/${D}-dev/main/${repo_subdir}/${pkgname}/${full_pkgname}
         return 1
       elif test -f "$WORKSPACE/packages/$TARGET/processed/${full_pkgname}" ; then
         echo "Package $full_pkgname exists, not rebuilding!"
@@ -353,7 +373,7 @@ handle_rails_package() {
     local srcdir="$1"; shift
     cd "$srcdir"
     local license_path="$1"; shift
-    local version="$(version_from_git)"
+    local version="$(rails_package_version $pkgname)"
     echo "$version" >package-build.version
     local scripts_dir="$(mktemp --tmpdir -d "$pkgname-XXXXXXXX.scripts")" && \
     (
index 0c7909c6ae55693459c3266e218c17deb6ae3971..766ff1b82465322cc07b40b235f80087804deb38 100755 (executable)
@@ -470,6 +470,7 @@ stop_services() {
         && python sdk/python/tests/run_test_server.py stop \
         && all_services_stopped=1
     deactivate
+    unset ARVADOS_CONFIG
 }
 
 interrupt() {
@@ -633,15 +634,12 @@ install_env() {
             for d in \
                 "$GOPATH/src/git.curoverse.com/arvados.git/tmp/GOPATH" \
                     "$GOPATH/src/git.curoverse.com/arvados.git/tmp" \
+                    "$GOPATH/src/git.curoverse.com/arvados.git/arvados" \
                     "$GOPATH/src/git.curoverse.com/arvados.git"; do
+                [[ -h "$d" ]] && rm "$d"
                 [[ -d "$d" ]] && rmdir "$d"
             done
         fi
-        for d in \
-            "$GOPATH/src/git.curoverse.com/arvados.git/arvados" \
-                "$GOPATH/src/git.curoverse.com/arvados.git"; do
-            [[ -h "$d" ]] && rm "$d"
-        done
         ln -vsfT "$WORKSPACE" "$GOPATH/src/git.curoverse.com/arvados.git"
         go get -v github.com/kardianos/govendor
         cd "$GOPATH/src/git.curoverse.com/arvados.git"
@@ -712,8 +710,6 @@ retry() {
 }
 
 do_test() {
-    check_arvados_config "$1"
-
     case "${1}" in
         apps/workbench_units | apps/workbench_functionals | apps/workbench_integration)
             suite=apps/workbench
@@ -733,11 +729,14 @@ do_test() {
     case "${1}" in
         services/api)
             stop_services
+            check_arvados_config "$1"
             ;;
         gofmt | govendor | doc | lib/cli | lib/cloud/azure | lib/cloud/ec2 | lib/cloud/cloudtest | lib/cmd | lib/dispatchcloud/ssh_executor | lib/dispatchcloud/worker)
+            check_arvados_config "$1"
             # don't care whether services are running
             ;;
         *)
+            check_arvados_config "$1"
             if ! start_services; then
                 checkexit 1 "$1 tests"
                 title "test $1 -- failed to start services"
@@ -748,6 +747,11 @@ do_test() {
     retry do_test_once ${@}
 }
 
+go_ldflags() {
+    version=${ARVADOS_VERSION:-$(git log -n1 --format=%H)-dev}
+    echo "-X git.curoverse.com/arvados.git/lib/cmd.version=${version} -X main.version=${version}"
+}
+
 do_test_once() {
     unset result
 
@@ -767,7 +771,7 @@ do_test_once() {
         # before trying "go test". Otherwise, coverage-reporting
         # mode makes Go show the wrong line numbers when reporting
         # compilation errors.
-        go get -ldflags "-X git.curoverse.com/arvados.git/lib/cmd.version=${ARVADOS_VERSION:-$(git log -n1 --format=%H)-dev}" -t "git.curoverse.com/arvados.git/$1" && \
+        go get -ldflags "$(go_ldflags)" -t "git.curoverse.com/arvados.git/$1" && \
             cd "$GOPATH/src/git.curoverse.com/arvados.git/$1" && \
             if [[ -n "${testargs[$1]}" ]]
         then
@@ -831,16 +835,17 @@ check_arvados_config() {
            install_env
        fi
        . "$VENVDIR/bin/activate"
+        cd "$WORKSPACE"
        eval $(python sdk/python/tests/run_test_server.py setup_config)
        deactivate
     fi
 }
 
 do_install() {
-    check_arvados_config "$1"
     if [[ -n "${skip[install]}" || ( -n "${only_install}" && "${only_install}" != "${1}" && "${only_install}" != "${2}" ) ]]; then
         return 0
     fi
+    check_arvados_config "$1"
     retry do_install_once ${@}
 }
 
@@ -854,7 +859,7 @@ do_install_once() {
         result=1
     elif [[ "$2" == "go" ]]
     then
-        go get -ldflags "-X git.curoverse.com/arvados.git/lib/cmd.version=${ARVADOS_VERSION:-$(git log -n1 --format=%H)-dev}" -t "git.curoverse.com/arvados.git/$1"
+        go get -ldflags "$(go_ldflags)" -t "git.curoverse.com/arvados.git/$1"
     elif [[ "$2" == "pip" ]]
     then
         # $3 can name a path directory for us to use, including trailing
index 314b5b5cd3230eb89c2423827cb8a1b120c6a4a3..0ae26584e855c18b2d6ad9a058deadff63e784e3 100644 (file)
@@ -151,7 +151,6 @@ navbar:
     - Configuration:
       - admin/config.html.textile.liquid
       - admin/federation.html.textile.liquid
-      - admin/collection-managed-properties.html.textile.liquid
     - Upgrading and migrations:
       - admin/upgrading.html.textile.liquid
       - admin/config-migration.html.textile.liquid
@@ -171,6 +170,10 @@ navbar:
       - admin/cloudtest.html.textile.liquid
     - Data Management:
       - admin/collection-versioning.html.textile.liquid
+      - admin/collection-managed-properties.html.textile.liquid
+      - admin/keep-balance.html.textile.liquid
+    - Other:
+      - admin/federation.html.textile.liquid
       - admin/controlling-container-reuse.html.textile.liquid
       - admin/logs-table-management.html.textile.liquid
     - Other:
diff --git a/doc/_includes/_assign_volume_uuid.liquid b/doc/_includes/_assign_volume_uuid.liquid
new file mode 100644 (file)
index 0000000..cdd0f1a
--- /dev/null
@@ -0,0 +1,7 @@
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Note that each volume has a UUID, like @zzzzz-nyw5e-0123456789abcde@. You assign these manually: replace @zzzzz@ with your cluster ID, and replace @0123456789abcde@ with an arbitrary string of 15 alphanumerics. Once assigned, UUIDs should not be changed.
index 6da1756b5ce94bc2f1e624a290aabe3bfb3f720e..0a4d1fa769ac14b691d05bff673a9487df9559c1 100644 (file)
@@ -16,7 +16,7 @@ h3. API Server configuration
 
 There are 2 configuration settings that control this feature, both go on the @application.yml@ file.
 
-h4. Settting: @collection_versioning@ (Boolean. Default: false)
+h4. Setting: @collection_versioning@ (Boolean. Default: false)
 
 If @true@, collection versioning is enabled, meaning that new version records can be created. Note that if you set @collection_versioning@ to @false@ after being enabled, old versions will still be accessible, but further changes will not be versioned.
 
index 4e2fd81afc8ec2cd6d7eb588939d31e3c325ad57..d40cd3bbdc5feeb548693d60d820aa75fd3fee7b 100644 (file)
@@ -58,9 +58,13 @@ h2. crunch-dispatch-slurm
 
 Currently only reads @InstanceTypes@ from centralized configuration.  Still requires component-specific configuration file.
 
-h2. keepstore
+h2(#keepstore). keepstore
 
-Currently only reads @RemoteClusters@ from centralized configuration.  Still requires component-specific configuration file.
+The legacy keepstore config (loaded from @/etc/arvados/keepstore/keepstore.yml@ or a different location specified via -legacy-keepstore-config command line argument) takes precedence over the centralized config. After you migrate everything from the legacy config to the centralized config, you should delete @/etc/arvados/keepstore/keepstore.yml@ and stop using the -legacy-keepstore-config argument.
+
+To migrate a keepstore node's configuration, first install @arvados-server@. Run @arvados-server config-diff@, review and apply the recommended changes to @/etc/arvados/config.yml@, and run @arvados-server config-diff@ again to check for additional warnings and recommendations. When you are satisfied, delete the legacy config file, restart keepstore, and check its startup logs. Copy the updated centralized config file to your next keepstore server, and repeat the process there.
+
+After migrating and removing all legacy keepstore config files, make sure the @/etc/arvados/config.yml@ file is identical across all system nodes -- API server, keepstore, etc. -- and restart all services to make sure they are using the latest configuration.
 
 h2(#keepproxy). keepproxy
 
@@ -70,6 +74,9 @@ h2(#arv-git-httpd). arv-git-httpd
 
 The legacy arv-git-httpd config (loaded from @/etc/arvados/git-httpd/git-httpd.yml@ or a different location specified via -legacy-git-httpd-config command line argument) takes precedence over the centralized config. After you migrate everything from the legacy config to the centralized config, you should delete @/etc/arvados/git-httpd/git-httpd.yml@ and stop using the -legacy-git-httpd-config argument.
 
+h2(#keepbalance). keep-balance
+
+The legacy keep-balance config (loaded from @/etc/arvados/keep-balance/keep-balance.yml@ or a different location specified via -legacy-keepbalance-config command line argument) takes precedence over the centralized config. After you migrate everything from the legacy config to the centralized config, you should delete @/etc/arvados/keep-balance/keep-balance.yml@ and stop using the -legacy-keepbalance-config argument.
 
 h2. arvados-controller
 
diff --git a/doc/admin/keep-balance.html.textile.liquid b/doc/admin/keep-balance.html.textile.liquid
new file mode 100644 (file)
index 0000000..5af0a26
--- /dev/null
@@ -0,0 +1,43 @@
+---
+layout: default
+navsection: admin
+title: Balancing Keep servers
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+This page describes how to balance keepstore servers using keep-balance. Keep-balance creates new copies of under-replicated blocks, deletes excess copies of over-replicated and unreferenced blocks, and moves blocks to better positions (e.g. after adding new keepstore servers) so clients find them faster.
+
+See "the Keep-balance install docs":{{site.baseurl}}/install/install-keep-balance.html for installation instructions.
+
+h3. Data deletion
+
+The keep-balance service determines which blocks are candidates for deletion and instructs the keepstore to move those blocks to the trash. When a block is newly written, it is protected from deletion for the duration in @BlobSigningTTL@.  During this time, it cannot be trashed or deleted.
+
+If keep-balance instructs keepstore to trash a block which is older than @BlobSigningTTL@, and @BlobTrashLifetime@ is non-zero, the block will be moved to "trash".  A block which is in the trash is no longer accessible by read requests, but has not yet been permanently deleted.  Blocks which are in the trash may be recovered using the "untrash" API endpoint.  Blocks are permanently deleted after they have been in the trash for the duration in @BlobTrashLifetime@.
+
+Keep-balance is also responsible for balancing the distribution of blocks across keepstore servers by asking servers to pull blocks from other servers (as determined by their "storage class":{{site.baseurl}}/admin/storage-classes.html and "rendezvous hashing order":{{site.baseurl}}/api/storage.html).  Pulling a block makes a copy.  If a block is overreplicated (i.e. there are excess copies) after pulling, it will be subsequently trashed and deleted on the original server, subject to @BlobTrash@ and @BlobTrashLifetime@ settings.
+
+h3. Scanning
+
+By default, keep-balance operates periodically, i.e. do a scan/balance operation, sleep, repeat.
+
+The @Collections.BalancePeriod@ value in @/etc/arvados/config.yml@ determines the interval between start times of successive scan/balance operations. If an operation takes longer than the @Collections.BalancePeriod@, the next operation will follow it immediately. If SIGUSR1 is received during an idle period between operations, the next operation will start immediately.
+
+Keep-balance can also be run with the @-once@ flag to do a single scan/balance operation and then exit. The exit code will be zero if the operation was successful.
+
+h3. Committing
+
+Keep-balance computes and reports changes but does not implement them by sending pull and trash lists to the Keep services unless the @-commit-pull@ and @-commit-trash@ flags are used.
+
+h3. Additional configuration
+
+For configuring resource usage tuning and lost block reporting, please see the @Collections.BlobMissingReport@, @Collections.BalanceCollectionBatch@, @Collections.BalanceCollectionBuffers@ option in the "default config.yml file":{{site.baseurl}}/admin/config.html.
+
+h3. Limitations
+
+Keep-balance does not attempt to discover whether committed pull and trash requests ever get carried out -- only that they are accepted by the Keep services. If some services are full, new copies of under-replicated blocks might never get made, only repeatedly requested.
\ No newline at end of file
index ddb52fc23aa8313e8c1a38045063eac8f98f6b33..376d7abc07f528c4e87249edf21fe96c364bd796 100644 (file)
@@ -41,13 +41,38 @@ table(table table-bordered table-condensed).
 
 h3(#master). development master (as of 2019-08-12)
 
+h4. Delete "keep_services" records
+
+After all keepproxy and keepstore configurations have been migrated to the centralized configuration file (see below), all keep_services records you added manually during installation should be removed. System logs from keepstore and keepproxy at startup, as well as the output of @arvados-server config-check@, will remind you to do this.
+
+<notextile><pre><code>$ export ARVADOS_API_HOST=...
+$ export ARVADOS_API_TOKEN=...
+$ arv --format=uuid keep_service list | xargs -n1 arv keep_service delete --uuid
+</code></pre></notextile>
+
+Once these old records are removed, @arv keep_service list@ will instead return the services listed under Services/Keepstore/InternalURLs and Services/Keepproxy/ExternalURL in your centralized configuration file.
+
+h4. Keep-balance configuration migration
+
+(feature "#14714":https://dev.arvados.org/issues/14714 ) The keep-balance service can now be configured using the centralized configuration file at @/etc/arvados/config.yml@. The following command line and configuration options have changed.
+
+You can no longer specify types of keep services to balance via the @KeepServiceTypes@ config option in the legacy config at @/etc/arvados/keep-balance/keep-balance.yml@. If you are still using the legacy config and @KeepServiceTypes@ has a value other than "disk", keep-balance will produce an error.
+
+You can no longer specify individual keep services to balance via the @config.KeepServiceList@ command line option or @KeepServiceList@ legacy config option. Instead, keep-balance will operate on all keepstore servers with @service_type:disk@ as reported by the @arv keep_service list@ command. If you are still using the legacy config, @KeepServiceList@ should be removed or keep-balance will produce an error.
+
+Please see the "config migration guide":{{site.baseurl}}/admin/config-migration.html and "keep-balance install guide":{{site.baseurl}}/install/install-keep-balance.html for more details.
+
 h4. Arv-git-httpd configuration migration
 
 (feature "#14712":https://dev.arvados.org/issues/14712 ) The arv-git-httpd package can now be configured using the centralized configuration file at @/etc/arvados/config.yml@. Configuration via individual command line arguments is no longer available. Please see "arv-git-httpd's config migration guide":{{site.baseurl}}/admin/config-migration.html#arv-git-httpd for more details.
 
-h4. Keep-web dropped support on command line flags configuration
+h4. Keepstore and keep-web configuration migration
+
+keepstore and keep-web no longer support configuration via (previously deprecated) command line configuration flags and environment variables.
+
+keep-web now supports the legacy @keep-web.yml@ config format (used by Arvados 1.4) and the new cluster config file format. Please check "keep-web's install guide":{{site.baseurl}}/install/install-keep-web.html for more details.
 
-As we're migrating to a central cluster configuration file, the already deprecated way of getting configurations via environment variables and command line flags isn't valid anymore. Current keep-web supports both the now legacy @keep-web.yml@ config format (used by Arvados 1.4) and the new cluster config file format. Please check "keep-web's install guide":{{site.baseurl}}/install/install-keep-web.html for more details.
+keepstore now supports the legacy @keepstore.yml@ config format (used by Arvados 1.4) and the new cluster config file format. Please check the "keepstore config migration notes":{{site.baseurl}}/admin/config-migration.html#keepstore and "keepstore install guide":{{site.baseurl}}/install/install-keepstore.html for more details.
 
 h4. Jobs API is read-only
 
index d59c66edc3cbf5f492dbb0c4befac668e209c980..5ec95cee62ab18a7f8b69ac42382a4f42e7ac1cd 100644 (file)
@@ -110,7 +110,9 @@ table(table table-bordered table-condensed).
 
 h3. delete
 
-Delete an existing Container.
+Delete a Container.
+
+This API requires admin privileges. In normal operation, it should not be used at all. API clients like Workbench might not work correctly when a container request references a container that has been deleted.
 
 Arguments:
 
index 7ee179071aed638a04bddfc2194319c5e0cf6f6a..1f08ea419523a5178946bfcf43bd1ecd4e3a96a4 100644 (file)
@@ -70,7 +70,7 @@ A privileged user account exists for the use by internal Arvados components.  Th
 
 h2. Anoymous user and group
 
-An Arvado site may be configued to allow users to browse resources without requiring a log in.  In this case, permissions for non-logged-in users are associated with the "anonymous" user.  To make objects visible to the public, they can be shared with the "anonymous" group.  The anonymous user uuid is @{siteprefix}-tpzed-anonymouspublic@.  The anonymous group uuid is @{siteprefix}-j7d0g-anonymouspublic@.
+An Arvados site may be configured to allow users to browse resources without requiring a login.  In this case, permissions for non-logged-in users are associated with the "anonymous" user.  To make objects visible to the public, they can be shared with the "anonymous" group.  The anonymous user uuid is @{siteprefix}-tpzed-anonymouspublic@.  The anonymous group uuid is @{siteprefix}-j7d0g-anonymouspublic@.
 
 h2. Example
 
index 8a0e7bfa077743b30329cd619ef4da6d1228c172..2f68c8a4bfddb3cf779123d412d0698c8caa95e4 100644 (file)
@@ -17,9 +17,9 @@ Before starting the configuration of individual keepstore servers is good to hav
 
 Another decision is how many VMs should be running keepstore. For example there could be 8 VMs with one core each or one machine with 8 cores. Or anything in between. Assuming is the same cost for Cloud resources, there is always the benefit of distributing the risk of faulty VMs. The recommendation is to start with 2 VMs and expand in pairs. Having a minimum of 2 cores each. The total amount of VMs will be a function of the budget and the pipeline traffic to avoid saturation during periods of high usage. Standard D v3 family is a balanced choice, making Standard_D2_v3 the 2-core option
 
-There are many options for storage accounts. You can read details from Azure on their documentation https://docs.microsoft.com/en-us/azure/storage/common/storage-introduction. The type of storage and access tier will be a function of the budget and desired responsiveness. A balanced option is to have General-purpose Standard Storage account and use Blob storage, hot access tiers.
+There are many options for storage accounts. You can read details from Azure on their documentation "https://docs.microsoft.com/en-us/azure/storage/common/storage-introduction":https://docs.microsoft.com/en-us/azure/storage/common/storage-introduction. The type of storage and access tier will be a function of the budget and desired responsiveness. A balanced option is to have General-purpose Standard Storage account and use Blob storage, hot access tiers.
 
-Keepstore can be configure to reflect the level of underlaying redundancy the storage will have. This is call data replication option. For example LRS (Locally Redundant Storage) saves 3 copies of the data. There desired redundancy can be chosen at the keepstore layer or at the Storage Accunt layer. The decision where the redundancy will be done and the type of Storage Account data replication (LRS, ZRS, GRS and RA-GRS) has trade-offs. Please read more on https://docs.microsoft.com/en-us/azure/storage/common/storage-redundancy and decide what is best for your needs.
+Keepstore can be configure to reflect the level of underlaying redundancy the storage will have. This is call data replication option. For example LRS (Locally Redundant Storage) saves 3 copies of the data. There desired redundancy can be chosen at the keepstore layer or at the Storage Accunt layer. The decision where the redundancy will be done and the type of Storage Account data replication (LRS, ZRS, GRS and RA-GRS) has trade-offs. Please read more on "https://docs.microsoft.com/en-us/azure/storage/common/storage-redundancy":https://docs.microsoft.com/en-us/azure/storage/common/storage-redundancy and decide what is best for your needs.
 
 h2. Create a storage container
 
@@ -42,72 +42,88 @@ azure storage container create exampleContainerName</span>
 </code></pre>
 </notextile>
 
-Note that Keepstore services may be configued to use multiple Azure Storage accounts and multiple containers within a storage account.
+Note that Keepstore services may be configured to use multiple Azure Storage accounts and multiple containers within a storage account.
 
 h2. Configure keepstore
 
-Copy the primary storage account key to a file where it will be accessible to keepstore at startup time.
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo sh -c 'cat &gt;/etc/arvados/keepstore/azure_storage_account_key.txt &lt;&lt;EOF'
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz==
-EOF</span>
-~$ <span class="userinput">sudo chmod 0400 /etc/arvados/keepstore/azure_storage_account_key.txt</span>
-</code></pre>
-</notextile>
-
-Next, edit the @Volumes@ section of the @keepstore.yml@ config file:
-
-<pre>
-Volumes:
-- # The volume type, this indicates Azure blob storage
-  Type: Azure
-
-  # How much replication is performed by the underlying container.
-  # This is used to inform replication decisions at the Keep layer.
-  AzureReplication: 3
-
-  # The storage container to use for the backing store.
-  ContainerName: exampleContainerName
-
-  # If true, do not accept write or trash operations, only reads.
-  ReadOnly: false
-
-  # Amount of time to wait for a response before failing the request
-  RequestTimeout: 2m0s
-
-  # The storage account name, used for authentication
-  StorageAccountName: exampleStorageAccountName
-
-  # The storage account secret key, used for authentication
-  StorageAccountKeyFile: /etc/arvados/keepstore/azure_storage_account_key.txt
-
-  # The cloud environment to use.  If blank, use the default cloud
-  # environment.  See below for an example of an alternate cloud environment.
-  StorageBaseURL: ""
-
-  # Storage classes to associate with this volume.  See "Storage
-  # classes" in the "Admin" section of doc.arvados.org.
-  StorageClasses: null
-
-- # Example configuration to use Azure China.
-  #
-  # The alternate cloud environment to use.
-  # Note that cloud environments are different from regions.  A
-  # cloud environment is an entirely separate instance of Azure with
-  # separate accounts, requiring separate credentials.
-  #
-  StorageBaseURL: core.chinacloudapi.cn
-  StorageAccountKeyFile: /etc/arvados/keepstore/azure_cn_storage_account_key.txt
-  StorageAccountName: cn-account-name
-  ContainerName: exampleChinaContainerName
-
-  # The rest are the same as above
-  Type: Azure
-  AzureReplication: 3
-  ReadOnly: false
-  RequestTimeout: 10m0s
-  StorageClasses: null
-</pre>
-
-Start (or restart) keepstore, and check its log file to confirm it is using the new configuration.
+Volumes are configured in the @Volumes@ section of the cluster configuration file.
+
+{% include 'assign_volume_uuid' %}
+
+<notextile><pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Volumes:
+      <span class="userinput">uuid_prefix</span>-nyw5e-<span class="userinput">000000000000000</span>:
+        AccessViaHosts:
+          # This section determines which keepstore servers access the
+          # volume. In this example, keep0 has read/write access, and
+          # keep1 has read-only access.
+          #
+          # If the AccessViaHosts section is empty or omitted, all
+          # keepstore servers will have read/write access to the
+          # volume.
+          "http://<span class="userinput">keep0.uuid_prefix.example.com</span>:25107/": {}
+          "http://<span class="userinput">keep1.uuid_prefix.example.com</span>:25107/": {ReadOnly: true}
+
+        Driver: Azure
+        DriverParameters:
+          # Storage account name and secret key, used for
+          # authentication.
+          StorageAccountName: exampleStorageAccountName
+          StorageAccountKey: zzzzzzzzzzzzzzzzzzzzzzzzzz
+
+          # The cloud environment to use,
+          # e.g. "core.chinacloudapi.cn". Defaults to
+          # "core.windows.net" if blank or omitted.
+          StorageBaseURL: ""
+
+          # Storage container name.
+          ContainerName: exampleContainerName
+
+          # Time to wait for an upstream response before failing the
+          # request.
+          RequestTimeout: 10m
+
+          # Time to wait before retrying a failed "list blobs" Azure
+          # API call.
+          ListBlobsRetryDelay: 10s
+
+          # Maximum attempts at a "list blobs" Azure API call before
+          # giving up.
+          ListBlobsMaxAttempts: 12
+
+          # If non-zero, use multiple concurrent requests (each
+          # requesting MaxGetBytes bytes) when retrieving data. If
+          # zero or omitted, get the entire blob with one request.
+          #
+          # Normally this is zero but if you find that 4 small
+          # requests complete faster than a single large request, for
+          # example, you might set this to 16777216 (64 MiB ÷ 4).
+          MaxGetBytes: 0
+
+          # Time to wait for an unexpectedly empty blob to become
+          # non-empty. Azure's create-and-write operation is not
+          # atomic. The default value typically allows concurrent GET
+          # and PUT requests to succeed despite the race window.
+          WriteRaceInterval: 15s
+
+          # Time to wait between GET attempts while waiting for
+          # WriteRaceInterval to expire.
+          WriteRacePollTime: 1s
+
+        # How much replication is provided by the underlying storage
+        # container.  This is used to inform replication decisions at
+        # the Keep layer.
+        Replication: 3
+
+        # If true, do not accept write or trash operations, even if
+        # AccessViaHosts.*.ReadOnly is false.
+        #
+        # If false or omitted, enable write access (subject to
+        # AccessViaHosts.*.ReadOnly, where applicable).
+        ReadOnly: false
+
+        # Storage classes to associate with this volume.  See "Storage
+        # classes" in the "Admin" section of doc.arvados.org.
+        StorageClasses: null
+</code></pre></notextile>
index ddd54c3f0c7f3e8d0c885d1f0fe99f06081bffcd..be0a48cb8ca6d5526d600bda92775860099f9548 100644 (file)
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: installguide
-title: Filesystem storage
+title: Configure filesystem storage
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -13,44 +13,77 @@ Keepstore can store data in local and network-attached POSIX filesystems.
 
 h2. Setting up filesystem mounts
 
-Volumes are configured in the @Volumes@ section of the configuration file.  You may provide multiple volumes for a single keepstore process to manage multiple disks.  Keepstore distributes blocks among volumes in round-robin fashion.
-
-<pre>
-Volumes:
-- # The volume type, indicates this is a filesystem directory.
-  Type: Directory
-
-  # The directory that will be used as the backing store.
-  Root: /mnt/local-disk
-
-  # How much replication is performed by the underlying filesystem.
-  # (for example, a network filesystem may provide its own replication).
-  # This is used to inform replication decisions at the Keep layer.
-  DirectoryReplication: 1
-
-  # If true, do not accept write or trash operations, only reads.
-  ReadOnly: false
-
-  # When true, read and write operations (for whole 64MiB blocks) on
-  # an individual volume will queued and issued serially.  When
-  # false, read and write operations will be issued concurrently.
-  #
-  # May improve throughput if you experience contention when there are
-  # multiple requests to the same volume.
-  #
-  # When using SSDs, RAID, or a parallel network filesystem, you probably
-  # don't want this.
-  Serialize: false
-
-  # Storage classes to associate with this volume.  See "Storage
-  # classes" in the "Admin" section of doc.arvados.org.
-  StorageClasses: null
-
-  # Example of a second volume section
-- DirectoryReplication: 2
-  ReadOnly: false
-  Root: /mnt/network-disk
-  Serialize: false
-  StorageClasses: null
-  Type: Directory
-</pre>
+Volumes are configured in the @Volumes@ section of the cluster configuration file.  You may provide multiple volumes for a single keepstore process to manage multiple disks.  Keepstore distributes blocks among volumes in round-robin fashion.
+
+{% include 'assign_volume_uuid' %}
+
+Note that each volume has an AccessViaHosts section indicating that (for example) keep0's /mnt/local-disk directory is volume 0, while keep1's /mnt/local-disk directory is volume 1.
+
+<notextile>
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Volumes:
+      <span class="userinput">uuid_prefix</span>-nyw5e-<span class="userinput">000000000000000</span>:
+        AccessViaHosts:
+          "http://<span class="userinput">keep0.uuid_prefix.example.com</span>:25107": {}
+        Driver: Directory
+        DriverParameters:
+          # The directory that will be used as the backing store.
+          Root: /mnt/local-disk
+
+          # When true, read and write operations (for whole 64MiB
+          # blocks) on an individual volume will queued and issued
+          # serially.  When false, read and write operations will be
+          # issued concurrently.
+          #
+          # May improve throughput if you experience contention when
+          # there are multiple requests to the same volume.
+          #
+          # When using SSDs, RAID, or a shared network filesystem, you
+          # probably don't want this.
+          Serialize: false
+
+        # How much replication is performed by the underlying
+        # filesystem.  (for example, a network filesystem may provide
+        # its own replication).  This is used to inform replication
+        # decisions at the Keep layer.
+        Replication: 1
+
+        # If true, do not accept write or trash operations, only
+        # reads.
+        ReadOnly: false
+
+        # Storage classes to associate with this volume.  See "Storage
+        # classes" in the "Admin" section of doc.arvados.org.
+        StorageClasses: null
+
+      <span class="userinput">uuid_prefix</span>-nyw5e-<span class="userinput">000000000000001</span>:
+        AccessViaHosts:
+          "http://keep1.<span class="userinput">uuid_prefix</span>.example.com:25107": {}
+        Driver: Directory
+        DriverParameters:
+          Root: /mnt/local-disk
+</code></pre></notextile>
+
+In the case of a network-attached filesystem, the AccessViaHosts section can have multiple entries. If the filesystem is accessible by all keepstore servers, the AccessViaHosts section can be empty, or omitted entirely.
+
+<notextile>
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Volumes:
+      <span class="userinput">uuid_prefix</span>-nyw5e-<span class="userinput">000000000000002</span>:
+        AccessViaHosts:
+          # This section determines which keepstore servers access the
+          # volume. In this example, keep0 has read/write access, and
+          # keep1 has read-only access.
+          #
+          # If the AccessViaHosts section is empty or omitted, all
+          # keepstore servers will have read/write access to the
+          # volume.
+          "http://<span class="userinput">keep0.uuid_prefix.example.com</span>:25107/": {}
+          "http://<span class="userinput">keep1.uuid_prefix.example.com</span>:25107/": {ReadOnly: true}
+        Driver: Directory
+        DriverParameters:
+          Root: /mnt/network-attached-filesystem
+        Replication: 2
+</code></pre></notextile>
index 88172fa9f7c04bf67e18e3697d9742fbc6cec285..b721dba9e1600d81fc0288abc56fa710f97caff3 100644 (file)
@@ -11,102 +11,90 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 Keepstore can store data in object storage compatible with the S3 API, such as Amazon S3, Google Cloud Storage, or Ceph RADOS.
 
-h2. Configure keepstore
-
-Copy the "access key" and "secret key" to files where they will be accessible to keepstore at startup time.
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo sh -c 'cat &gt;/etc/arvados/keepstore/aws_s3_access_key.txt &lt;&lt;EOF'
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz==
-EOF</span>
-~$ <span class="userinput">sudo chmod 0400 /etc/arvados/keepstore/aws_s3_access_key.txt</span>
-</code></pre>
-</notextile>
-
-Next, edit the @Volumes@ section of the @keepstore.yml@ config file.
-
-h3. Example config for Amazon S3
-
-<pre>
-Volumes:
-- # The volume type, this indicates object storage compatible with the S3 API
-  Type: S3
-
-  # Storage provider.  If blank, uses Amazon S3 by default.
-  # See below for example alternate configuration for Google cloud
-  # storage.
-  Endpoint: ""
-
-  # The bucket to use for the backing store.
-  Bucket: example-bucket-name
-
-  # The region where the bucket is located.
-  Region: us-east-1
-
-  # The credentials to use to access the bucket.
-  AccessKeyFile: /etc/arvados/keepstore/aws_s3_access_key.txt
-  SecretKeyFile: /etc/arvados/keepstore/aws_s3_secret_key.txt
-
-  # Maximum time to wait making the initial connection to the backend before
-  # failing the request.
-  ConnectTimeout: 1m0s
-
-  # Page size for s3 "list bucket contents" requests
-  IndexPageSize: 1000
-
-  # True if the region requires a LocationConstraint declaration
-  LocationConstraint: false
-
-  # Maximum eventual consistency latency
-  RaceWindow: 24h0m0s
-
-  # If true, do not accept write or trash operations, only reads.
-  ReadOnly: false
-
-  # Maximum time to wait for a complete response from the backend before
-  # failing the request.
-  ReadTimeout: 2m0s
-
-  # How much replication is performed by the underlying bucket.
-  # This is used to inform replication decisions at the Keep layer.
-  S3Replication: 2
-
-  # Storage classes to associate with this volume.  See
-  # "Storage classes" in the "Admin" section of doc.arvados.org.
-  StorageClasses: null
-
-  # Enable deletion (garbage collection) even when TrashLifetime is
-  # zero.  WARNING: eventual consistency may result in race conditions
-  # that can cause data loss.  Do not enable this unless you know what
-  # you are doing.
-  UnsafeDelete: false
-</pre>
-
-Start (or restart) keepstore, and check its log file to confirm it is using the new configuration.
-
-h3. Example config for Google cloud storage
-
-See previous section for documentation of configuration fields.
-
-<pre>
-Volumes:
-- # Example configuration using alternate storage provider
-  # Configuration for Google cloud storage
-  Endpoint: https://storage.googleapis.com
-  Region: ""
-
-  AccessKeyFile: /etc/arvados/keepstore/gce_s3_access_key.txt
-  SecretKeyFile: /etc/arvados/keepstore/gce_s3_secret_key.txt
-  Bucket: example-bucket-name
-  ConnectTimeout: 1m0s
-  IndexPageSize: 1000
-  LocationConstraint: false
-  RaceWindow: 24h0m0s
-  ReadOnly: false
-  ReadTimeout: 2m0s
-  S3Replication: 2
-  StorageClasses: null
-  UnsafeDelete: false
-</pre>
-
-Start (or restart) keepstore, and check its log file to confirm it is using the new configuration.
+Volumes are configured in the @Volumes@ section of the cluster configuration file.
+
+{% include 'assign_volume_uuid' %}
+
+<notextile><pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Volumes:
+      <span class="userinput">uuid_prefix</span>-nyw5e-<span class="userinput">000000000000000</span>:
+        AccessViaHosts:
+          # This section determines which keepstore servers access the
+          # volume. In this example, keep0 has read/write access, and
+          # keep1 has read-only access.
+          #
+          # If the AccessViaHosts section is empty or omitted, all
+          # keepstore servers will have read/write access to the
+          # volume.
+          "http://<span class="userinput">keep0.uuid_prefix.example.com</span>:25107/": {}
+          "http://<span class="userinput">keep1.uuid_prefix.example.com</span>:25107/": {ReadOnly: true}
+
+        Driver: S3
+        DriverParameters:
+          # IAM role name to use when retrieving credentials from
+          # instance metadata. It can be omitted, in which case the
+          # role name itself will be retrieved from instance metadata
+          # -- but setting it explicitly may protect you from using
+          # the wrong credentials in the event of an
+          # installation/configuration error.
+          IAMRole: ""
+
+          # If you are not using an IAM role for authentication,
+          # specify access credentials here instead.
+          AccessKey: ""
+          SecretKey: ""
+
+          # Storage provider endpoint. For Amazon S3, use "" or
+          # omit. For Google Cloud Storage, use
+          # "https://storage.googleapis.com".
+          Endpoint: ""
+
+          # Storage provider region. For Google Cloud Storage, use ""
+          # or omit.
+          Region: us-east-1a
+
+          # Change to true if the region requires a LocationConstraint
+          # declaration.
+          LocationConstraint: false
+
+          # Bucket name.
+          Bucket: example-bucket-name
+
+          # Requested page size for "list bucket contents" requests.
+          IndexPageSize: 1000
+
+          # Maximum time to wait while making the initial connection
+          # to the backend before failing the request.
+          ConnectTimeout: 1m
+
+          # Maximum time to wait for a complete response from the
+          # backend before failing the request.
+          ReadTimeout: 2m
+
+          # Maximum eventual consistency latency
+          RaceWindow: 24h
+
+          # Enable deletion (garbage collection) even when the
+          # configured BlobTrashLifetime is zero.  WARNING: eventual
+          # consistency may result in race conditions that can cause
+          # data loss.  Do not enable this unless you understand and
+          # accept the risk.
+          UnsafeDelete: false
+
+        # How much replication is provided by the underlying bucket.
+        # This is used to inform replication decisions at the Keep
+        # layer.
+        Replication: 2
+
+        # If true, do not accept write or trash operations, even if
+        # AccessViaHosts.*.ReadOnly is false.
+        #
+        # If false or omitted, enable write access (subject to
+        # AccessViaHosts.*.ReadOnly, where applicable).
+        ReadOnly: false
+
+        # Storage classes to associate with this volume.  See "Storage
+        # classes" in the "Admin" section of doc.arvados.org.
+        StorageClasses: null
+</code></pre></notextile>
index 4a35f448e2996dfe8134a674af3cd422752e5ee2..d29166459c95bd307e555eee42f6cbd82e1be323 100644 (file)
@@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-Keep-balance deletes unreferenced and overreplicated blocks from Keep servers, makes additional copies of underreplicated blocks, and moves blocks into optimal locations as needed (e.g., after adding new servers).
+Keep-balance deletes unreferenced and overreplicated blocks from Keep servers, makes additional copies of underreplicated blocks, and moves blocks into optimal locations as needed (e.g., after adding new servers). See "Balancing Keep servers":{{site.baseurl}}/admin/keep-balance.html for usage details.
 
 {% include 'notebox_begin' %}
 
@@ -42,61 +42,32 @@ Verify that @keep-balance@ is functional:
 <notextile>
 <pre><code>~$ <span class="userinput">keep-balance -h</span>
 ...
-Usage: keep-balance [options]
-
-Options:
+Usage of ./keep-balance:
   -commit-pulls
-        send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)
+       send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)
   -commit-trash
-        send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)
+       send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)
 ...
 </code></pre>
 </notextile>
 
-h3. Create a keep-balance token
-
-Create an Arvados superuser token for use by keep-balance.
-
-{% include 'create_superuser_token' %}
-
-h3. Update keepstore configuration files
-
-On each node that runs keepstore, save the token you generated in the previous step in a text file like @/etc/arvados/keepstore/system-auth-token.txt@ and then create or update @/etc/arvados/keepstore/keepstore.yml@ with the following key:
-
-<notextile>
-<pre><code>SystemAuthTokenFile: /etc/arvados/keepstore/system-auth-token.txt
-</code></pre>
-</notextile>
-
-Restart all keepstore services to apply the updated configuration.
-
-h3. Create a keep-balance configuration file
+h3. Update the cluster config
 
-On the host running keep-balance, create @/etc/arvados/keep-balance/keep-balance.yml@ using the token you generated above.  Follow this YAML format:
+Edit the cluster config at @/etc/arvados/config.yml@ and set @Services.Keepbalance.InternalURLs@. Replace @uuid_prefix@ with your cluster id.
 
 <notextile>
-<pre><code>Listen: :9005
-Client:
-  APIHost: <span class="userinput">uuid_prefix.your.domain</span>:443
-  AuthToken: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
-KeepServiceTypes:
-  - disk
-ManagementToken: <span class="userinput">xyzzy</span>
-RunPeriod: 10m
-CollectionBatchSize: 100000
-CollectionBuffers: 1000
-LostBlocksFile: /tmp/keep-balance-lost-blocks.txt    # If given, this file will be updated atomically during each successful run.
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Services:
+      Keepbalance:
+        InternalURLs:
+          "http://localhost:9005/": {}
+    TLS:
+      Insecure: false
 </code></pre>
 </notextile>
 
-If your API server's SSL certificate is not signed by a recognized CA, add the @Insecure@ option to the @Client@ section:
-
-<notextile>
-<pre><code>Client:
-  <span class="userinput">Insecure: true</span>
-  APIHost: ...
-</code></pre>
-</notextile>
+Set @TLS.Insecure: true@ if your API server’s TLS certificate is not signed by a recognized CA.
 
 h3. Start the service (option 1: systemd)
 
@@ -153,28 +124,14 @@ run: /etc/service/keep-balance: (pid 12520) 2s; run: log: (pid 12519) 2s
 </code></pre>
 </notextile>
 
-h2. Enable delete operations on keepstore volumes
+h2. Enable garbage collection
 
-Ensure your keepstore services have the "delete" operation enabled. If it is disabled (which is the default), unneeded blocks will be identified by keep-balance, but will never be deleted from the underlying storage devices.
-
-Add the @-never-delete=false@ command line flag to your keepstore run script:
+Ensure your cluster configuration has @Collections.BlobTrash: true@ (this is the default).
 
 <notextile>
-<pre><code>keepstore <span class="userinput">-never-delete=false</span> -volume=...
+<pre><code>~$ arvados-server config-dump | grep BlobTrash:
+      BlobTrash: true
 </code></pre>
 </notextile>
 
-{% comment %}
-// To replace the above section when the keepstore page recommends YAML...
-
-Use the @EnableDelete@ flag in your YAML configuration file @/etc/arvados/keepstore/keepstore.yml@:
-
-<notextile>
-<pre><code>...
-BlobSigningKeyFile: /etc/keepstore/blob-signing.key
-<span class="userinput">EnableDelete: true</span>
-Listen: :25107
-...
-</code></pre>
-</notextile>
-{% endcomment %}
+If BlobTrash is false, unneeded blocks will be counted and logged by keep-balance, but they will not be deleted.
index 5044cc0c21596c6ee577eb6011e1f901cc82c0a5..71c1cb639e5afd26edf083685a11d7f043ea15cb 100644 (file)
@@ -11,6 +11,15 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 Keepstore provides access to underlying storage for reading and writing content-addressed blocks, with enforcement of Arvados permissions.  Keepstore supports a variety of cloud object storage and POSIX filesystems for its backing store.
 
+h3. Plan your storage layout
+
+In the steps below, you will configure a number of backend storage volumes (like local filesystems and S3 buckets) and specify which keepstore servers have read-only and read-write access to which volumes.
+
+It is possible to configure arbitrary server/volume layouts. However, in order to provide good performance and efficient use of storage resources, we strongly recommend using one of the following layouts:
+
+# Each volume is writable by exactly one server, and optionally readable by one or more other servers. The total capacity of all writable volumes is the same for each server.
+# Each volume is writable by all servers. Each volume has enough built-in redundancy to satisfy your requirements, i.e., you do not need Arvados to mirror data across multiple volumes.
+
 We recommend starting off with two Keepstore servers.  Exact server specifications will be site and workload specific, but in general keepstore will be I/O bound and should be set up to maximize aggregate bandwidth with compute nodes.  To increase capacity (either space or throughput) it is straightforward to add additional servers, or (in cloud environments) to increase the machine size of the existing servers.
 
 By convention, we use the following hostname pattern:
@@ -47,128 +56,32 @@ Verify that Keepstore is functional:
 </code></pre>
 </notextile>
 
-h3. Create config file
+h3. Create a superuser token
 
-By default, keepstore will look for its configuration file at @/etc/arvados/keepstore/keepstore.yml@
+If you haven't already done so, create a superuser token.
 
-You can override the configuration file location using the @-config@ command line option to keepstore.
-
-The following is a sample configuration file:
-
-<pre>
-# Duration for which new permission signatures (returned in PUT
-# responses) will be valid.  This should be equal to the API
-# server's blob_signature_ttl configuration entry.
-BlobSignatureTTL: 336h0m0s
-
-# Local file containing the secret blob signing key (used to generate
-# and verify blob signatures).  The contents of the key file must be
-# identical to the API server's blob_signing_key configuration entry.
-BlobSigningKeyFile: ""
-
-# Print extra debug logging
-Debug: false
-
-# Maximum number of concurrent block deletion operations (per
-# volume) when emptying trash. Default is 1.
-EmptyTrashWorkers: 1
-
-# Enable trash and delete features. If false, trash lists will be
-# accepted but blocks will not be trashed or deleted.
-# Keepstore does not delete data on its own.  The keep-balance
-# service determines which blocks are candidates for deletion
-# and instructs the keepstore to move those blocks to the trash.
-EnableDelete: true
-
-# Local port to listen on. Can be 'address:port' or ':port', where
-# 'address' is a host IP address or name and 'port' is a port number
-# or name.
-Listen: :25107
-
-# Format of request/response and error logs: "json" or "text".
-LogFormat: json
-
-# The secret key that must be provided by monitoring services when
-# using the health check and metrics endpoints (/_health, /metrics).
-ManagementToken: xyzzy
-
-# Maximum RAM to use for data buffers, given in multiples of block
-# size (64 MiB). When this limit is reached, HTTP requests requiring
-# buffers (like GET and PUT) will wait for buffer space to be
-# released.
-#
-# It should be set such that MaxBuffers * 64MiB + 10% fits
-# comfortably in memory. On a host dedicated to running keepstore,
-# divide total memory by 88MiB to suggest a suitable value. For example,
-# if grep MemTotal /proc/meminfo reports MemTotal: 7125440 kB,
-# compute 7125440 / (88 * 1024)=79 and configure MaxBuffers: 79
-MaxBuffers: 128
-
-# Maximum concurrent requests. When this limit is reached, new
-# requests will receive 503 responses. Note: this limit does not
-# include idle connections from clients using HTTP keepalive, so it
-# does not strictly limit the number of concurrent connections. If
-# omitted or zero, the default is 2 * MaxBuffers.
-MaxRequests: 0
-
-# Path to write PID file during startup. This file is kept open and
-# locked with LOCK_EX until keepstore exits, so "fuser -k pidfile" is
-# one way to shut down. Exit immediately if there is an error
-# opening, locking, or writing the PID file.
-PIDFile: ""
-
-# Maximum number of concurrent pull operations. Default is 1, i.e.,
-# pull lists are processed serially.  A pull operation copies a block
-# from another keepstore server.
-PullWorkers: 1
-
-# Honor read requests only if a valid signature is provided.  This
-# should be true, except for development use and when migrating from
-# a very old version.
-RequireSignatures: true
-
-# Local file containing the Arvados API token used by keep-balance
-# or data manager.  Delete, trash, and index requests are honored
-# only for this token.
-SystemAuthTokenFile: ""
-
-# Path to server certificate file in X509 format. Enables TLS mode.
-#
-# Example: /var/lib/acme/live/keep0.example.com/fullchain
-TLSCertificateFile: ""
-
-# Path to server key file in X509 format. Enables TLS mode.
-#
-# The key pair is read from disk during startup, and whenever SIGHUP
-# is received.
-#
-# Example: /var/lib/acme/live/keep0.example.com/privkey
-TLSKeyFile: ""
-
-# How often to check for (and delete) trashed blocks whose
-# TrashLifetime has expired.
-TrashCheckInterval: 24h0m0s
-
-# Time duration after a block is trashed during which it can be
-# recovered using an /untrash request.
-TrashLifetime: 336h0m0s
-
-# Maximum number of concurrent trash operations (moving a block to the
-# trash, or permanently deleting it) . Default is 1, i.e., trash lists
-# are processed serially.  If individual trash operations have high
-# latency (eg some cloud platforms) you should increase this.
-TrashWorkers: 1
-</pre>
+{% include 'create_superuser_token' %}
 
-h3. Notes on storage management
+h3. Update cluster config file
 
-On its own, a keepstore server never deletes data.  The "keep-balance":install-keep-balance.html service determines which blocks are candidates for deletion and instructs the keepstore to move those blocks to the trash.
+Add or update the following sections of @/etc/arvados/config.yml@ as needed. Refer to the examples and comments in the "default config.yml file":{{site.baseurl}}/admin/config.html for more information.
 
-When a block is newly written, it is protected from deletion for the duration in @BlobSignatureTTL@.  During this time, it cannot be trashed.
+<notextile>
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    SystemRootToken: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+    Services:
+      Keepstore:
+        InternalURLs:
+          "http://<span class="userinput">keep0.uuid_prefix.example.com</span>:25107/": {}
+    API:
+      MaxKeepBlobBuffers: 128
+</code></pre>
+</notextile>
 
-If keep-balance instructs keepstore to trash a block which is older than @BlobSignatureTTL@, and @EnableDelete@ is true, the block will be moved to "trash".  A block which is in the trash is no longer accessible by read requests, but has not yet been permanently deleted.  Blocks which are in the trash may be recovered using the "untrash" API endpoint.  Blocks are permanently deleted after they have been in the trash for the duration in @TrashLifetime@.
+h3. Note on storage management
 
-Keep-balance is also responsible for balancing the distribution of blocks across keepstore servers by asking servers to pull blocks from other servers (as determined by their "storage class":{{site.baseurl}}/admin/storage-classes.html and "rendezvous hashing order":{{site.baseurl}}/api/storage.html).  Pulling a block makes a copy.  If a block is overreplicated (i.e. there are excess copies) after pulling, it will be subsequently trashed on the original server.
+On its own, a keepstore server never deletes data. Instead, the keep-balance service determines which blocks are candidates for deletion and instructs the keepstore to move those blocks to the trash. Please see the "Balancing Keep servers":{{site.baseurl}}/admin/keep-balance.html for more details.
 
 h3. Configure storage volumes
 
@@ -178,7 +91,30 @@ Available storage volume types include POSIX filesystems and cloud object storag
 * If you are using S3-compatible object storage (including Amazon S3, Google Cloud Storage, and Ceph RADOS), follow the setup instructions on "S3 Object Storage":configure-s3-object-storage.html
 * If you are using Azure Blob Storage, follow the setup instructions on "Azure Blob Storage":configure-azure-blob-storage.html
 
-h3. Run keepstore as a supervised service
+h2. Run keepstore as a supervised service
+
+h3. Start the service (option 1: systemd)
+
+If your system does not use systemd, skip this section and follow the "runit instructions":#runit instead.
+
+If your system uses systemd, the keepstore service should already be set up. Restart it to read the updated configuration, and check its status:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo systemctl restart keepstore</span>
+~$ <span class="userinput">sudo systemctl status keepstore</span>
+&#x25cf; keepstore.service - Arvados Keep Storage Daemon
+   Loaded: loaded (/etc/systemd/system/keepstore.service; enabled; vendor preset: enabled)
+   Active: active (running) since Tue 2019-09-10 14:16:29 UTC; 1s ago
+     Docs: https://doc.arvados.org/
+ Main PID: 25465 (keepstore)
+    Tasks: 9 (limit: 4915)
+   CGroup: /system.slice/keepstore.service
+           └─25465 /usr/bin/keepstore
+[...]
+</code></pre>
+</notextile>
+
+h3(#runit). Start the service (option 2: runit)
 
 Install runit to supervise the keepstore daemon.  {% include 'install_runit' %}
 
@@ -188,45 +124,23 @@ Install this script as the run script @/etc/sv/keepstore/run@ for the keepstore
 <pre><code>#!/bin/sh
 
 exec 2>&1
-GOGC=10 exec keepstore -config /etc/arvados/keepstore/keepstore.yml
+GOGC=10 exec keepstore
 </code></pre>
 </notextile>
 
-h3. Set up additional servers
+h2. Set up additional servers
 
 Repeat the above sections to prepare volumes and bring up supervised services on each Keepstore server you are setting up.
 
-h3. Tell the API server about the Keepstore servers
+h2. Restart the API server and controller
 
-The API server needs to be informed about the presence of your Keepstore servers.
-
-First, if you don't already have an admin token, create a superuser token.
-
-{% include 'create_superuser_token' %}
-
-Configure your environment to run @arv@ using the output of create_superuser_token.rb:
+After adding all of your keepstore servers to the Services section, make sure the cluster config file is up to date on the API server host, and restart the API server and controller processes to ensure the changes are applied.
 
 <pre>
-export ARVADOS_API_HOST=zzzzz.example.com
-export ARVADOS_API_TOKEN=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+sudo systemctl restart nginx arvados-controller
 </pre>
 
-Use this command to register each keepstore server you have installed.  Make sure to update the @service_host@ value.
-
-<notextile>
-<pre><code>~$ <span class="userinput">uuid_prefix=`arv --format=uuid user current | cut -d- -f1`</span>
-~$ <span class="userinput">echo "Site prefix is '$uuid_prefix'"</span>
-~$ <span class="userinput">read -rd $'\000' keepservice &lt;&lt;EOF; arv keep_service create --keep-service "$keepservice"</span>
-<span class="userinput">{
- "service_host":"<strong>keep0.$uuid_prefix.your.domain</strong>",
- "service_port":25107,
- "service_ssl_flag":false,
- "service_type":"disk"
-}
-EOF</span>
-</code></pre></notextile>
-
-h3(#testing). Testing keep
+h2(#testing). Testing keep
 
 Install the "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html
 
index af7c571203b3dcf48f8a2198ded9e6d90648e8be..fb1cba38b4857d4b253933158f3f8a64a002cb48 100644 (file)
@@ -24,6 +24,12 @@ var (
 
 type CommandSuite struct{}
 
+func (s *CommandSuite) SetUpSuite(c *check.C) {
+       os.Unsetenv("ARVADOS_API_HOST")
+       os.Unsetenv("ARVADOS_API_HOST_INSECURE")
+       os.Unsetenv("ARVADOS_API_TOKEN")
+}
+
 func (s *CommandSuite) TestBadArg(c *check.C) {
        var stderr bytes.Buffer
        code := DumpCommand.RunCommand("arvados config-dump", []string{"-badarg"}, bytes.NewBuffer(nil), bytes.NewBuffer(nil), &stderr)
@@ -85,7 +91,7 @@ func (s *CommandSuite) TestCheckOldKeepstoreConfigFile(c *check.C) {
        c.Assert(err, check.IsNil)
        defer os.Remove(f.Name())
 
-       io.WriteString(f, "Debug: true\n")
+       io.WriteString(f, "Listen: :12345\nDebug: true\n")
 
        var stdout, stderr bytes.Buffer
        in := `
@@ -97,7 +103,7 @@ Clusters:
        code := CheckCommand.RunCommand("arvados config-check", []string{"-config", "-", "-legacy-keepstore-config", f.Name()}, bytes.NewBufferString(in), &stdout, &stderr)
        c.Check(code, check.Equals, 1)
        c.Check(stdout.String(), check.Matches, `(?ms).*\n\- +.*LogLevel: info\n\+ +LogLevel: debug\n.*`)
-       c.Check(stderr.String(), check.Matches, `.*you should remove the legacy keepstore config file.*\n`)
+       c.Check(stderr.String(), check.Matches, `(?ms).*you should remove the legacy keepstore config file.*\n`)
 }
 
 func (s *CommandSuite) TestCheckUnknownKey(c *check.C) {
index c7a038bec6c5da05fbeef993acae2f31ff7197b9..4e3bf6d6c937d89ccdc8cdd1781e5f3d76425965 100644 (file)
@@ -25,8 +25,15 @@ Clusters:
       # listening, and reachable from other hosts in the cluster.
       SAMPLE:
         InternalURLs:
-          "http://example.host:12345": {}
-          SAMPLE: {}
+          "http://host1.example:12345": {}
+          "http://host2.example:12345":
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
+          SAMPLE:
+            Rendezvous: ""
         ExternalURL: "-"
 
       RailsAPI:
@@ -176,6 +183,15 @@ Clusters:
       # parameter higher than this value, this value is used instead.
       MaxItemsPerResponse: 1000
 
+      # Maximum number of concurrent requests to accept in a single
+      # service process, or 0 for no limit. Currently supported only
+      # by keepstore.
+      MaxConcurrentRequests: 0
+
+      # Maximum number of 64MiB memory buffers per keepstore server
+      # process, or 0 for no limit.
+      MaxKeepBlobBuffers: 128
+
       # API methods to disable. Disabled methods are not listed in the
       # discovery document, and respond 404 to all requests.
       # Example: {"jobs.create":{}, "pipeline_instances.create": {}}
@@ -303,43 +319,75 @@ Clusters:
       MaxRequestLogParamsSize: 2000
 
     Collections:
-      # Allow clients to create collections by providing a manifest with
-      # unsigned data blob locators. IMPORTANT: This effectively disables
-      # access controls for data stored in Keep: a client who knows a hash
-      # can write a manifest that references the hash, pass it to
-      # collections.create (which will create a permission link), use
-      # collections.get to obtain a signature for that data locator, and
-      # use that signed locator to retrieve the data from Keep. Therefore,
-      # do not turn this on if your users expect to keep data private from
-      # one another!
+
+      # Enable access controls for data stored in Keep. This should
+      # always be set to true on a production cluster.
       BlobSigning: true
 
       # BlobSigningKey is a string of alphanumeric characters used to
       # generate permission signatures for Keep locators. It must be
-      # identical to the permission key given to Keep. IMPORTANT: This is
-      # a site secret. It should be at least 50 characters.
+      # identical to the permission key given to Keep. IMPORTANT: This
+      # is a site secret. It should be at least 50 characters.
       #
       # Modifying BlobSigningKey will invalidate all existing
       # signatures, which can cause programs to fail (e.g., arv-put,
-      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only when
-      # no such processes are running.
+      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only
+      # when no such processes are running.
       BlobSigningKey: ""
 
+      # Enable garbage collection of unreferenced blobs in Keep.
+      BlobTrash: true
+
+      # Time to leave unreferenced blobs in "trashed" state before
+      # deleting them, or 0 to skip the "trashed" state entirely and
+      # delete unreferenced blobs.
+      #
+      # If you use any Amazon S3 buckets as storage volumes, this
+      # must be at least 24h to avoid occasional data loss.
+      BlobTrashLifetime: 336h
+
+      # How often to check for (and delete) trashed blocks whose
+      # BlobTrashLifetime has expired.
+      BlobTrashCheckInterval: 24h
+
+      # Maximum number of concurrent "trash blob" and "delete trashed
+      # blob" operations conducted by a single keepstore process. Each
+      # of these can be set to 0 to disable the respective operation.
+      #
+      # If BlobTrashLifetime is zero, "trash" and "delete trash"
+      # happen at once, so only the lower of these two values is used.
+      BlobTrashConcurrency: 4
+      BlobDeleteConcurrency: 4
+
+      # Maximum number of concurrent "create additional replica of
+      # existing blob" operations conducted by a single keepstore
+      # process.
+      BlobReplicateConcurrency: 4
+
       # Default replication level for collections. This is used when a
       # collection's replication_desired attribute is nil.
       DefaultReplication: 2
 
-      # Lifetime (in seconds) of blob permission signatures generated by
-      # the API server. This determines how long a client can take (after
-      # retrieving a collection record) to retrieve the collection data
-      # from Keep. If the client needs more time than that (assuming the
-      # collection still has the same content and the relevant user/token
-      # still has permission) the client can retrieve the collection again
-      # to get fresh signatures.
+      # BlobSigningTTL determines the minimum lifetime of transient
+      # data, i.e., blocks that are not referenced by
+      # collections. Unreferenced blocks exist for two reasons:
+      #
+      # 1) A data block must be written to a disk/cloud backend device
+      # before a collection can be created/updated with a reference to
+      # it.
+      #
+      # 2) Deleting or updating a collection can remove the last
+      # remaining reference to a data block.
       #
-      # This must be exactly equal to the -blob-signature-ttl flag used by
-      # keepstore servers.  Otherwise, reading data blocks and saving
-      # collections will fail with HTTP 403 permission errors.
+      # If BlobSigningTTL is too short, long-running
+      # processes/containers will fail when they take too long (a)
+      # between writing blocks and writing collections that reference
+      # them, or (b) between reading collections and reading the
+      # referenced blocks.
+      #
+      # If BlobSigningTTL is too long, data will still be stored long
+      # after the referring collections are deleted, and you will
+      # needlessly fill up disks or waste money on cloud storage.
       #
       # Modifying BlobSigningTTL invalidates existing signatures; see
       # BlobSigningKey note above.
@@ -347,6 +395,36 @@ Clusters:
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
+      # When running keep-balance, this is the destination filename for
+      # the list of lost block hashes if there are any, one per line.
+      # Updated automically during each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 0
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
@@ -750,6 +828,67 @@ Clusters:
         Price: 0.1
         Preemptible: false
 
+    Volumes:
+      SAMPLE:
+        # AccessViaHosts specifies which keepstore processes can read
+        # and write data on the volume.
+        #
+        # For a local filesystem, AccessViaHosts has one entry,
+        # indicating which server the filesystem is located on.
+        #
+        # For a network-attached backend accessible by all keepstore
+        # servers, like a cloud storage bucket or an NFS mount,
+        # AccessViaHosts can be empty/omitted.
+        #
+        # Further info/examples:
+        # https://doc.arvados.org/install/configure-fs-storage.html
+        # https://doc.arvados.org/install/configure-s3-object-storage.html
+        # https://doc.arvados.org/install/configure-azure-blob-storage.html
+        AccessViaHosts:
+          SAMPLE:
+            ReadOnly: false
+          "http://host1.example:25107": {}
+        ReadOnly: false
+        Replication: 1
+        StorageClasses:
+          default: true
+          SAMPLE: true
+        Driver: s3
+        DriverParameters:
+
+          # for s3 driver -- see
+          # https://doc.arvados.org/install/configure-s3-object-storage.html
+          IAMRole: aaaaa
+          AccessKey: aaaaa
+          SecretKey: aaaaa
+          Endpoint: ""
+          Region: us-east-1a
+          Bucket: aaaaa
+          LocationConstraint: false
+          IndexPageSize: 1000
+          ConnectTimeout: 1m
+          ReadTimeout: 10m
+          RaceWindow: 24h
+          UnsafeDelete: false
+
+          # for azure driver -- see
+          # https://doc.arvados.org/install/configure-azure-blob-storage.html
+          StorageAccountName: aaaaa
+          StorageAccountKey: aaaaa
+          StorageBaseURL: core.windows.net
+          ContainerName: aaaaa
+          RequestTimeout: 30s
+          ListBlobsRetryDelay: 10s
+          ListBlobsMaxAttempts: 10
+          MaxGetBytes: 0
+          WriteRaceInterval: 15s
+          WriteRacePollTime: 1s
+
+          # for local directory driver -- see
+          # https://doc.arvados.org/install/configure-fs-storage.html
+          Root: /var/lib/arvados/keep-data
+          Serialize: false
+
     Mail:
       MailchimpAPIKey: ""
       MailchimpListID: ""
index 9eb8c40c18d9455693a9ce18d24d890c528f6d25..22eed080a2ca041103d40f157a4645a9ece85b3e 100644 (file)
@@ -7,6 +7,7 @@ package config
 import (
        "fmt"
        "io/ioutil"
+       "net/url"
        "os"
        "strings"
 
@@ -102,12 +103,6 @@ func applyDeprecatedNodeProfile(hostname string, ssi systemServiceInstance, svc
        svc.InternalURLs[arvados.URL{Scheme: scheme, Host: host}] = arvados.ServiceInstance{}
 }
 
-const defaultKeepstoreConfigPath = "/etc/arvados/keepstore/keepstore.yml"
-
-type oldKeepstoreConfig struct {
-       Debug *bool
-}
-
 func (ldr *Loader) loadOldConfigHelper(component, path string, target interface{}) error {
        if path == "" {
                return nil
@@ -126,35 +121,6 @@ func (ldr *Loader) loadOldConfigHelper(component, path string, target interface{
        return nil
 }
 
-// update config using values from an old-style keepstore config file.
-func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
-       if ldr.KeepstorePath == "" {
-               return nil
-       }
-       var oc oldKeepstoreConfig
-       err := ldr.loadOldConfigHelper("keepstore", ldr.KeepstorePath, &oc)
-       if os.IsNotExist(err) && (ldr.KeepstorePath == defaultKeepstoreConfigPath) {
-               return nil
-       } else if err != nil {
-               return err
-       }
-
-       cluster, err := cfg.GetCluster("")
-       if err != nil {
-               return err
-       }
-
-       if v := oc.Debug; v == nil {
-       } else if *v && cluster.SystemLogs.LogLevel != "debug" {
-               cluster.SystemLogs.LogLevel = "debug"
-       } else if !*v && cluster.SystemLogs.LogLevel != "info" {
-               cluster.SystemLogs.LogLevel = "info"
-       }
-
-       cfg.Clusters[cluster.ClusterID] = *cluster
-       return nil
-}
-
 type oldCrunchDispatchSlurmConfig struct {
        Client *arvados.Client
 
@@ -509,3 +475,105 @@ func (ldr *Loader) loadOldGitHttpdConfig(cfg *arvados.Config) error {
        cfg.Clusters[cluster.ClusterID] = *cluster
        return nil
 }
+
+const defaultKeepBalanceConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
+
+type oldKeepBalanceConfig struct {
+       Client              *arvados.Client
+       Listen              *string
+       KeepServiceTypes    *[]string
+       KeepServiceList     *arvados.KeepServiceList
+       RunPeriod           *arvados.Duration
+       CollectionBatchSize *int
+       CollectionBuffers   *int
+       RequestTimeout      *arvados.Duration
+       LostBlocksFile      *string
+       ManagementToken     *string
+}
+
+func (ldr *Loader) loadOldKeepBalanceConfig(cfg *arvados.Config) error {
+       if ldr.KeepBalancePath == "" {
+               return nil
+       }
+       var oc oldKeepBalanceConfig
+       err := ldr.loadOldConfigHelper("keep-balance", ldr.KeepBalancePath, &oc)
+       if os.IsNotExist(err) && ldr.KeepBalancePath == defaultKeepBalanceConfigPath {
+               return nil
+       } else if err != nil {
+               return err
+       }
+
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return err
+       }
+
+       loadOldClientConfig(cluster, oc.Client)
+
+       if oc.Listen != nil {
+               cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: *oc.Listen}] = arvados.ServiceInstance{}
+       }
+       if oc.ManagementToken != nil {
+               cluster.ManagementToken = *oc.ManagementToken
+       }
+       if oc.RunPeriod != nil {
+               cluster.Collections.BalancePeriod = *oc.RunPeriod
+       }
+       if oc.LostBlocksFile != nil {
+               cluster.Collections.BlobMissingReport = *oc.LostBlocksFile
+       }
+       if oc.CollectionBatchSize != nil {
+               cluster.Collections.BalanceCollectionBatch = *oc.CollectionBatchSize
+       }
+       if oc.CollectionBuffers != nil {
+               cluster.Collections.BalanceCollectionBuffers = *oc.CollectionBuffers
+       }
+       if oc.RequestTimeout != nil {
+               cluster.API.KeepServiceRequestTimeout = *oc.RequestTimeout
+       }
+
+       msg := "The %s configuration option is no longer supported. Please remove it from your configuration file. See the keep-balance upgrade notes at https://doc.arvados.org/admin/upgrading.html for more details."
+
+       // If the keep service type provided is "disk" silently ignore it, since
+       // this is what ends up being done anyway.
+       if oc.KeepServiceTypes != nil {
+               numTypes := len(*oc.KeepServiceTypes)
+               if numTypes != 0 && !(numTypes == 1 && (*oc.KeepServiceTypes)[0] == "disk") {
+                       return fmt.Errorf(msg, "KeepServiceType")
+               }
+       }
+
+       if oc.KeepServiceList != nil {
+               return fmt.Errorf(msg, "KeepServiceList")
+       }
+
+       cfg.Clusters[cluster.ClusterID] = *cluster
+       return nil
+}
+
+func (ldr *Loader) loadOldEnvironmentVariables(cfg *arvados.Config) error {
+       if os.Getenv("ARVADOS_API_TOKEN") == "" && os.Getenv("ARVADOS_API_HOST") == "" {
+               return nil
+       }
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return err
+       }
+       if tok := os.Getenv("ARVADOS_API_TOKEN"); tok != "" && cluster.SystemRootToken == "" {
+               ldr.Logger.Warn("SystemRootToken missing from cluster config, falling back to ARVADOS_API_TOKEN environment variable")
+               cluster.SystemRootToken = tok
+       }
+       if apihost := os.Getenv("ARVADOS_API_HOST"); apihost != "" && cluster.Services.Controller.ExternalURL.Host == "" {
+               ldr.Logger.Warn("Services.Controller.ExternalURL missing from cluster config, falling back to ARVADOS_API_HOST(_INSECURE) environment variables")
+               u, err := url.Parse("https://" + apihost)
+               if err != nil {
+                       return fmt.Errorf("cannot parse ARVADOS_API_HOST: %s", err)
+               }
+               cluster.Services.Controller.ExternalURL = arvados.URL(*u)
+               if i := os.Getenv("ARVADOS_API_HOST_INSECURE"); i != "" && i != "0" {
+                       cluster.TLS.Insecure = true
+               }
+       }
+       cfg.Clusters[cluster.ClusterID] = *cluster
+       return nil
+}
diff --git a/lib/config/deprecated_keepstore.go b/lib/config/deprecated_keepstore.go
new file mode 100644 (file)
index 0000000..2901929
--- /dev/null
@@ -0,0 +1,688 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package config
+
+import (
+       "bufio"
+       "bytes"
+       "crypto/rand"
+       "encoding/json"
+       "fmt"
+       "io/ioutil"
+       "math/big"
+       "net"
+       "os"
+       "strconv"
+       "strings"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/sirupsen/logrus"
+)
+
+const defaultKeepstoreConfigPath = "/etc/arvados/keepstore/keepstore.yml"
+
+type oldKeepstoreConfig struct {
+       Debug  *bool
+       Listen *string
+
+       LogFormat *string
+
+       PIDFile *string
+
+       MaxBuffers  *int
+       MaxRequests *int
+
+       BlobSignatureTTL    *arvados.Duration
+       BlobSigningKeyFile  *string
+       RequireSignatures   *bool
+       SystemAuthTokenFile *string
+       EnableDelete        *bool
+       TrashLifetime       *arvados.Duration
+       TrashCheckInterval  *arvados.Duration
+       PullWorkers         *int
+       TrashWorkers        *int
+       EmptyTrashWorkers   *int
+       TLSCertificateFile  *string
+       TLSKeyFile          *string
+
+       Volumes *oldKeepstoreVolumeList
+
+       ManagementToken *string
+
+       DiscoverVolumesFromMountsFile string // not a real legacy config -- just useful for tests
+}
+
+type oldKeepstoreVolumeList []oldKeepstoreVolume
+
+type oldKeepstoreVolume struct {
+       arvados.Volume
+       Type string `json:",omitempty"`
+
+       // Azure driver configs
+       StorageAccountName    string           `json:",omitempty"`
+       StorageAccountKeyFile string           `json:",omitempty"`
+       StorageBaseURL        string           `json:",omitempty"`
+       ContainerName         string           `json:",omitempty"`
+       AzureReplication      int              `json:",omitempty"`
+       RequestTimeout        arvados.Duration `json:",omitempty"`
+       ListBlobsRetryDelay   arvados.Duration `json:",omitempty"`
+       ListBlobsMaxAttempts  int              `json:",omitempty"`
+
+       // S3 driver configs
+       AccessKeyFile      string           `json:",omitempty"`
+       SecretKeyFile      string           `json:",omitempty"`
+       Endpoint           string           `json:",omitempty"`
+       Region             string           `json:",omitempty"`
+       Bucket             string           `json:",omitempty"`
+       LocationConstraint bool             `json:",omitempty"`
+       IndexPageSize      int              `json:",omitempty"`
+       S3Replication      int              `json:",omitempty"`
+       ConnectTimeout     arvados.Duration `json:",omitempty"`
+       ReadTimeout        arvados.Duration `json:",omitempty"`
+       RaceWindow         arvados.Duration `json:",omitempty"`
+       UnsafeDelete       bool             `json:",omitempty"`
+
+       // Directory driver configs
+       Root                 string
+       DirectoryReplication int
+       Serialize            bool
+
+       // Common configs
+       ReadOnly       bool     `json:",omitempty"`
+       StorageClasses []string `json:",omitempty"`
+}
+
+// update config using values from an old-style keepstore config file.
+func (ldr *Loader) loadOldKeepstoreConfig(cfg *arvados.Config) error {
+       if ldr.KeepstorePath == "" {
+               return nil
+       }
+       hostname, err := os.Hostname()
+       if err != nil {
+               return fmt.Errorf("getting hostname: %s", err)
+       }
+
+       var oc oldKeepstoreConfig
+       err = ldr.loadOldConfigHelper("keepstore", ldr.KeepstorePath, &oc)
+       if os.IsNotExist(err) && ldr.KeepstorePath == defaultKeepstoreConfigPath {
+               return nil
+       } else if err != nil {
+               return err
+       }
+
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return err
+       }
+
+       myURL := arvados.URL{Scheme: "http"}
+       if oc.TLSCertificateFile != nil && oc.TLSKeyFile != nil {
+               myURL.Scheme = "https"
+       }
+
+       if v := oc.Debug; v == nil {
+       } else if *v && cluster.SystemLogs.LogLevel != "debug" {
+               cluster.SystemLogs.LogLevel = "debug"
+       } else if !*v && cluster.SystemLogs.LogLevel != "info" {
+               cluster.SystemLogs.LogLevel = "info"
+       }
+
+       if v := oc.TLSCertificateFile; v != nil {
+               cluster.TLS.Certificate = "file://" + *v
+       }
+       if v := oc.TLSKeyFile; v != nil {
+               cluster.TLS.Key = "file://" + *v
+       }
+       if v := oc.Listen; v != nil {
+               if _, ok := cluster.Services.Keepstore.InternalURLs[arvados.URL{Scheme: myURL.Scheme, Host: *v}]; ok {
+                       // already listed
+                       myURL.Host = *v
+               } else if len(*v) > 1 && (*v)[0] == ':' {
+                       myURL.Host = net.JoinHostPort(hostname, (*v)[1:])
+                       cluster.Services.Keepstore.InternalURLs[myURL] = arvados.ServiceInstance{}
+               } else {
+                       return fmt.Errorf("unable to migrate Listen value %q -- you must update Services.Keepstore.InternalURLs manually, and comment out the Listen entry in your legacy keepstore config file", *v)
+               }
+       } else {
+               for url := range cluster.Services.Keepstore.InternalURLs {
+                       if host, _, _ := net.SplitHostPort(url.Host); host == hostname {
+                               myURL = url
+                               break
+                       }
+               }
+               if myURL.Host == "" {
+                       return fmt.Errorf("unable to migrate legacy keepstore config: no 'Listen' key, and hostname %q does not match an entry in Services.Keepstore.InternalURLs", hostname)
+               }
+       }
+
+       if v := oc.LogFormat; v != nil {
+               cluster.SystemLogs.Format = *v
+       }
+       if v := oc.MaxBuffers; v != nil {
+               cluster.API.MaxKeepBlobBuffers = *v
+       }
+       if v := oc.MaxRequests; v != nil {
+               cluster.API.MaxConcurrentRequests = *v
+       }
+       if v := oc.BlobSignatureTTL; v != nil {
+               cluster.Collections.BlobSigningTTL = *v
+       }
+       if v := oc.BlobSigningKeyFile; v != nil {
+               buf, err := ioutil.ReadFile(*v)
+               if err != nil {
+                       return fmt.Errorf("error reading BlobSigningKeyFile: %s", err)
+               }
+               if key := strings.TrimSpace(string(buf)); key != cluster.Collections.BlobSigningKey {
+                       cluster.Collections.BlobSigningKey = key
+               }
+       }
+       if v := oc.RequireSignatures; v != nil {
+               cluster.Collections.BlobSigning = *v
+       }
+       if v := oc.SystemAuthTokenFile; v != nil {
+               f, err := os.Open(*v)
+               if err != nil {
+                       return fmt.Errorf("error opening SystemAuthTokenFile: %s", err)
+               }
+               defer f.Close()
+               buf, err := ioutil.ReadAll(f)
+               if err != nil {
+                       return fmt.Errorf("error reading SystemAuthTokenFile: %s", err)
+               }
+               if key := strings.TrimSpace(string(buf)); key != cluster.SystemRootToken {
+                       cluster.SystemRootToken = key
+               }
+       }
+       if v := oc.EnableDelete; v != nil {
+               cluster.Collections.BlobTrash = *v
+       }
+       if v := oc.TrashLifetime; v != nil {
+               cluster.Collections.BlobTrashLifetime = *v
+       }
+       if v := oc.TrashCheckInterval; v != nil {
+               cluster.Collections.BlobTrashCheckInterval = *v
+       }
+       if v := oc.TrashWorkers; v != nil {
+               cluster.Collections.BlobTrashConcurrency = *v
+       }
+       if v := oc.EmptyTrashWorkers; v != nil {
+               cluster.Collections.BlobDeleteConcurrency = *v
+       }
+       if v := oc.PullWorkers; v != nil {
+               cluster.Collections.BlobReplicateConcurrency = *v
+       }
+       if oc.Volumes == nil || len(*oc.Volumes) == 0 {
+               ldr.Logger.Warn("no volumes in legacy config; discovering local directory volumes")
+               err := ldr.discoverLocalVolumes(cluster, oc.DiscoverVolumesFromMountsFile, myURL)
+               if err != nil {
+                       return fmt.Errorf("error discovering local directory volumes: %s", err)
+               }
+       } else {
+               err := ldr.migrateOldKeepstoreVolumes(cluster, oc, myURL)
+               if err != nil {
+                       return err
+               }
+       }
+
+       if err := ldr.checkPendingKeepstoreMigrations(cluster); err != nil {
+               return err
+       }
+
+       cfg.Clusters[cluster.ClusterID] = *cluster
+       return nil
+}
+
+// Merge Volumes section of old keepstore config into cluster config.
+func (ldr *Loader) migrateOldKeepstoreVolumes(cluster *arvados.Cluster, oc oldKeepstoreConfig, myURL arvados.URL) error {
+       for i, oldvol := range *oc.Volumes {
+               var accessViaHosts map[arvados.URL]arvados.VolumeAccess
+               oldUUID, found := ldr.alreadyMigrated(oldvol, cluster.Volumes, myURL)
+               if found {
+                       accessViaHosts = cluster.Volumes[oldUUID].AccessViaHosts
+                       writers := false
+                       for _, va := range accessViaHosts {
+                               if !va.ReadOnly {
+                                       writers = true
+                               }
+                       }
+                       if writers || len(accessViaHosts) == 0 {
+                               ldr.Logger.Infof("ignoring volume #%d's parameters in legacy keepstore config: using matching entry in cluster config instead", i)
+                               if len(accessViaHosts) > 0 {
+                                       cluster.Volumes[oldUUID].AccessViaHosts[myURL] = arvados.VolumeAccess{ReadOnly: oldvol.ReadOnly}
+                               }
+                               continue
+                       }
+               }
+               var newvol arvados.Volume
+               if found {
+                       ldr.Logger.Infof("ignoring volume #%d's parameters in legacy keepstore config: using matching entry in cluster config instead", i)
+                       newvol = cluster.Volumes[oldUUID]
+                       // Remove the old entry. It will be added back
+                       // below, possibly with a new UUID.
+                       delete(cluster.Volumes, oldUUID)
+               } else {
+                       v, err := ldr.translateOldKeepstoreVolume(oldvol)
+                       if err != nil {
+                               return err
+                       }
+                       newvol = v
+               }
+               if accessViaHosts == nil {
+                       accessViaHosts = make(map[arvados.URL]arvados.VolumeAccess, 1)
+               }
+               accessViaHosts[myURL] = arvados.VolumeAccess{ReadOnly: oldvol.ReadOnly}
+               newvol.AccessViaHosts = accessViaHosts
+
+               volUUID := oldUUID
+               if oldvol.ReadOnly {
+               } else if oc.Listen == nil {
+                       ldr.Logger.Warn("cannot find optimal volume UUID because Listen address is not given in legacy keepstore config")
+               } else if uuid, _, err := findKeepServicesItem(cluster, *oc.Listen); err != nil {
+                       ldr.Logger.WithError(err).Warn("cannot find optimal volume UUID: failed to find a matching keep_service listing for this legacy keepstore config")
+               } else if len(uuid) != 27 {
+                       ldr.Logger.WithField("UUID", uuid).Warn("cannot find optimal volume UUID: keep_service UUID does not have expected format")
+               } else {
+                       rendezvousUUID := cluster.ClusterID + "-nyw5e-" + uuid[12:]
+                       if _, ok := cluster.Volumes[rendezvousUUID]; ok {
+                               ldr.Logger.Warn("suggesting a random volume UUID because the volume ID matching our keep_service UUID is already in use")
+                       } else {
+                               volUUID = rendezvousUUID
+                       }
+                       si := cluster.Services.Keepstore.InternalURLs[myURL]
+                       si.Rendezvous = uuid[12:]
+                       cluster.Services.Keepstore.InternalURLs[myURL] = si
+               }
+               if volUUID == "" {
+                       volUUID = newUUID(cluster.ClusterID, "nyw5e")
+                       ldr.Logger.WithField("UUID", volUUID).Infof("suggesting a random volume UUID for volume #%d in legacy config", i)
+               }
+               cluster.Volumes[volUUID] = newvol
+       }
+       return nil
+}
+
+func (ldr *Loader) translateOldKeepstoreVolume(oldvol oldKeepstoreVolume) (arvados.Volume, error) {
+       var newvol arvados.Volume
+       var params interface{}
+       switch oldvol.Type {
+       case "S3":
+               accesskeydata, err := ioutil.ReadFile(oldvol.AccessKeyFile)
+               if err != nil && oldvol.AccessKeyFile != "" {
+                       return newvol, fmt.Errorf("error reading AccessKeyFile: %s", err)
+               }
+               secretkeydata, err := ioutil.ReadFile(oldvol.SecretKeyFile)
+               if err != nil && oldvol.SecretKeyFile != "" {
+                       return newvol, fmt.Errorf("error reading SecretKeyFile: %s", err)
+               }
+               newvol = arvados.Volume{
+                       Driver:         "S3",
+                       ReadOnly:       oldvol.ReadOnly,
+                       Replication:    oldvol.S3Replication,
+                       StorageClasses: array2boolmap(oldvol.StorageClasses),
+               }
+               params = arvados.S3VolumeDriverParameters{
+                       AccessKey:          string(bytes.TrimSpace(accesskeydata)),
+                       SecretKey:          string(bytes.TrimSpace(secretkeydata)),
+                       Endpoint:           oldvol.Endpoint,
+                       Region:             oldvol.Region,
+                       Bucket:             oldvol.Bucket,
+                       LocationConstraint: oldvol.LocationConstraint,
+                       IndexPageSize:      oldvol.IndexPageSize,
+                       ConnectTimeout:     oldvol.ConnectTimeout,
+                       ReadTimeout:        oldvol.ReadTimeout,
+                       RaceWindow:         oldvol.RaceWindow,
+                       UnsafeDelete:       oldvol.UnsafeDelete,
+               }
+       case "Azure":
+               keydata, err := ioutil.ReadFile(oldvol.StorageAccountKeyFile)
+               if err != nil && oldvol.StorageAccountKeyFile != "" {
+                       return newvol, fmt.Errorf("error reading StorageAccountKeyFile: %s", err)
+               }
+               newvol = arvados.Volume{
+                       Driver:         "Azure",
+                       ReadOnly:       oldvol.ReadOnly,
+                       Replication:    oldvol.AzureReplication,
+                       StorageClasses: array2boolmap(oldvol.StorageClasses),
+               }
+               params = arvados.AzureVolumeDriverParameters{
+                       StorageAccountName:   oldvol.StorageAccountName,
+                       StorageAccountKey:    string(bytes.TrimSpace(keydata)),
+                       StorageBaseURL:       oldvol.StorageBaseURL,
+                       ContainerName:        oldvol.ContainerName,
+                       RequestTimeout:       oldvol.RequestTimeout,
+                       ListBlobsRetryDelay:  oldvol.ListBlobsRetryDelay,
+                       ListBlobsMaxAttempts: oldvol.ListBlobsMaxAttempts,
+               }
+       case "Directory":
+               newvol = arvados.Volume{
+                       Driver:         "Directory",
+                       ReadOnly:       oldvol.ReadOnly,
+                       Replication:    oldvol.DirectoryReplication,
+                       StorageClasses: array2boolmap(oldvol.StorageClasses),
+               }
+               params = arvados.DirectoryVolumeDriverParameters{
+                       Root:      oldvol.Root,
+                       Serialize: oldvol.Serialize,
+               }
+       default:
+               return newvol, fmt.Errorf("unsupported volume type %q", oldvol.Type)
+       }
+       dp, err := json.Marshal(params)
+       if err != nil {
+               return newvol, err
+       }
+       newvol.DriverParameters = json.RawMessage(dp)
+       if newvol.Replication < 1 {
+               newvol.Replication = 1
+       }
+       return newvol, nil
+}
+
+func (ldr *Loader) alreadyMigrated(oldvol oldKeepstoreVolume, newvols map[string]arvados.Volume, myURL arvados.URL) (string, bool) {
+       for uuid, newvol := range newvols {
+               if oldvol.Type != newvol.Driver {
+                       continue
+               }
+               switch oldvol.Type {
+               case "S3":
+                       var params arvados.S3VolumeDriverParameters
+                       if err := json.Unmarshal(newvol.DriverParameters, &params); err == nil &&
+                               oldvol.Endpoint == params.Endpoint &&
+                               oldvol.Region == params.Region &&
+                               oldvol.Bucket == params.Bucket &&
+                               oldvol.LocationConstraint == params.LocationConstraint {
+                               return uuid, true
+                       }
+               case "Azure":
+                       var params arvados.AzureVolumeDriverParameters
+                       if err := json.Unmarshal(newvol.DriverParameters, &params); err == nil &&
+                               oldvol.StorageAccountName == params.StorageAccountName &&
+                               oldvol.StorageBaseURL == params.StorageBaseURL &&
+                               oldvol.ContainerName == params.ContainerName {
+                               return uuid, true
+                       }
+               case "Directory":
+                       var params arvados.DirectoryVolumeDriverParameters
+                       if err := json.Unmarshal(newvol.DriverParameters, &params); err == nil &&
+                               oldvol.Root == params.Root {
+                               if _, ok := newvol.AccessViaHosts[myURL]; ok || len(newvol.AccessViaHosts) == 0 {
+                                       return uuid, true
+                               }
+                       }
+               }
+       }
+       return "", false
+}
+
+func (ldr *Loader) discoverLocalVolumes(cluster *arvados.Cluster, mountsFile string, myURL arvados.URL) error {
+       if mountsFile == "" {
+               mountsFile = "/proc/mounts"
+       }
+       f, err := os.Open(mountsFile)
+       if err != nil {
+               return fmt.Errorf("error opening %s: %s", mountsFile, err)
+       }
+       defer f.Close()
+       scanner := bufio.NewScanner(f)
+       for scanner.Scan() {
+               args := strings.Fields(scanner.Text())
+               dev, mount := args[0], args[1]
+               if mount == "/" {
+                       continue
+               }
+               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
+                       continue
+               }
+               keepdir := mount + "/keep"
+               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
+                       continue
+               }
+
+               ro := false
+               for _, fsopt := range strings.Split(args[3], ",") {
+                       if fsopt == "ro" {
+                               ro = true
+                       }
+               }
+
+               uuid := newUUID(cluster.ClusterID, "nyw5e")
+               ldr.Logger.WithFields(logrus.Fields{
+                       "UUID":                       uuid,
+                       "Driver":                     "Directory",
+                       "DriverParameters.Root":      keepdir,
+                       "DriverParameters.Serialize": false,
+                       "ReadOnly":                   ro,
+                       "Replication":                1,
+               }).Warn("adding local directory volume")
+
+               p, err := json.Marshal(arvados.DirectoryVolumeDriverParameters{
+                       Root:      keepdir,
+                       Serialize: false,
+               })
+               if err != nil {
+                       panic(err)
+               }
+               cluster.Volumes[uuid] = arvados.Volume{
+                       Driver:           "Directory",
+                       DriverParameters: p,
+                       ReadOnly:         ro,
+                       Replication:      1,
+                       AccessViaHosts: map[arvados.URL]arvados.VolumeAccess{
+                               myURL: {ReadOnly: ro},
+                       },
+               }
+       }
+       if err := scanner.Err(); err != nil {
+               return fmt.Errorf("reading %s: %s", mountsFile, err)
+       }
+       return nil
+}
+
+func array2boolmap(keys []string) map[string]bool {
+       m := map[string]bool{}
+       for _, k := range keys {
+               m[k] = true
+       }
+       return m
+}
+
+func newUUID(clusterID, infix string) string {
+       randint, err := rand.Int(rand.Reader, big.NewInt(0).Exp(big.NewInt(36), big.NewInt(15), big.NewInt(0)))
+       if err != nil {
+               panic(err)
+       }
+       randstr := randint.Text(36)
+       for len(randstr) < 15 {
+               randstr = "0" + randstr
+       }
+       return fmt.Sprintf("%s-%s-%s", clusterID, infix, randstr)
+}
+
+// Return the UUID and URL for the controller's keep_services listing
+// corresponding to this host/process.
+func findKeepServicesItem(cluster *arvados.Cluster, listen string) (uuid string, url arvados.URL, err error) {
+       client, err := arvados.NewClientFromConfig(cluster)
+       if err != nil {
+               return
+       }
+       client.AuthToken = cluster.SystemRootToken
+       var svcList arvados.KeepServiceList
+       err = client.RequestAndDecode(&svcList, "GET", "arvados/v1/keep_services", nil, nil)
+       if err != nil {
+               return
+       }
+       hostname, err := os.Hostname()
+       if err != nil {
+               err = fmt.Errorf("error getting hostname: %s", err)
+               return
+       }
+       var tried []string
+       for _, ks := range svcList.Items {
+               if ks.ServiceType == "proxy" {
+                       continue
+               } else if keepServiceIsMe(ks, hostname, listen) {
+                       return ks.UUID, keepServiceURL(ks), nil
+               } else {
+                       tried = append(tried, fmt.Sprintf("%s:%d", ks.ServiceHost, ks.ServicePort))
+               }
+       }
+       err = fmt.Errorf("listen address %q does not match any of the non-proxy keep_services entries %q", listen, tried)
+       return
+}
+
+func keepServiceURL(ks arvados.KeepService) arvados.URL {
+       url := arvados.URL{
+               Scheme: "http",
+               Host:   net.JoinHostPort(ks.ServiceHost, strconv.Itoa(ks.ServicePort)),
+       }
+       if ks.ServiceSSLFlag {
+               url.Scheme = "https"
+       }
+       return url
+}
+
+var localhostOrAllInterfaces = map[string]bool{
+       "localhost": true,
+       "127.0.0.1": true,
+       "::1":       true,
+       "::":        true,
+       "":          true,
+}
+
+// Return true if the given KeepService entry matches the given
+// hostname and (keepstore config file) listen address.
+//
+// If the KeepService host is some variant of "localhost", we assume
+// this is a testing or single-node environment, ignore the given
+// hostname, and return true if the port numbers match.
+//
+// The hostname isn't assumed to be a FQDN: a hostname "foo.bar" will
+// match a KeepService host "foo.bar", but also "foo.bar.example",
+// "foo.bar.example.org", etc.
+func keepServiceIsMe(ks arvados.KeepService, hostname string, listen string) bool {
+       // Extract the port name/number from listen, and resolve it to
+       // a port number to compare with ks.ServicePort.
+       _, listenport, err := net.SplitHostPort(listen)
+       if err != nil && strings.HasPrefix(listen, ":") {
+               listenport = listen[1:]
+       }
+       if lp, err := net.LookupPort("tcp", listenport); err != nil {
+               return false
+       } else if !(lp == ks.ServicePort ||
+               (lp == 0 && ks.ServicePort == 80)) {
+               return false
+       }
+
+       kshost := strings.ToLower(ks.ServiceHost)
+       return localhostOrAllInterfaces[kshost] || strings.HasPrefix(kshost+".", strings.ToLower(hostname)+".")
+}
+
+// Warn about pending keepstore migration tasks that haven't already
+// been warned about in loadOldKeepstoreConfig() -- i.e., unmigrated
+// keepstore hosts other than the present host, and obsolete content
+// in the keep_services table.
+func (ldr *Loader) checkPendingKeepstoreMigrations(cluster *arvados.Cluster) error {
+       if cluster.Services.Controller.ExternalURL.String() == "" {
+               ldr.Logger.Debug("Services.Controller.ExternalURL not configured -- skipping check for pending keepstore config migrations")
+               return nil
+       }
+       if ldr.SkipAPICalls {
+               ldr.Logger.Debug("(Loader).SkipAPICalls == true -- skipping check for pending keepstore config migrations")
+               return nil
+       }
+       client, err := arvados.NewClientFromConfig(cluster)
+       if err != nil {
+               return err
+       }
+       client.AuthToken = cluster.SystemRootToken
+       var svcList arvados.KeepServiceList
+       err = client.RequestAndDecode(&svcList, "GET", "arvados/v1/keep_services", nil, nil)
+       if err != nil {
+               ldr.Logger.WithError(err).Warn("error retrieving keep_services list -- skipping check for pending keepstore config migrations")
+               return nil
+       }
+       hostname, err := os.Hostname()
+       if err != nil {
+               return fmt.Errorf("error getting hostname: %s", err)
+       }
+       sawTimes := map[time.Time]bool{}
+       for _, ks := range svcList.Items {
+               sawTimes[ks.CreatedAt] = true
+               sawTimes[ks.ModifiedAt] = true
+       }
+       if len(sawTimes) <= 1 {
+               // If all timestamps in the arvados/v1/keep_services
+               // response are identical, it's a clear sign the
+               // response was generated on the fly from the cluster
+               // config, rather than real database records. In that
+               // case (as well as the case where none are listed at
+               // all) it's pointless to look for entries that
+               // haven't yet been migrated to the config file.
+               return nil
+       }
+       needDBRows := false
+       for _, ks := range svcList.Items {
+               if ks.ServiceType == "proxy" {
+                       if len(cluster.Services.Keepproxy.InternalURLs) == 0 {
+                               needDBRows = true
+                               ldr.Logger.Warn("you should migrate your keepproxy configuration to the cluster configuration file")
+                       }
+                       continue
+               }
+               kshost := strings.ToLower(ks.ServiceHost)
+               if localhostOrAllInterfaces[kshost] || strings.HasPrefix(kshost+".", strings.ToLower(hostname)+".") {
+                       // it would be confusing to recommend
+                       // migrating *this* host's legacy keepstore
+                       // config immediately after explaining that
+                       // very migration process in more detail.
+                       continue
+               }
+               ksurl := keepServiceURL(ks)
+               if _, ok := cluster.Services.Keepstore.InternalURLs[ksurl]; ok {
+                       // already added to InternalURLs
+                       continue
+               }
+               ldr.Logger.Warnf("you should migrate the legacy keepstore configuration file on host %s", ks.ServiceHost)
+       }
+       if !needDBRows {
+               ldr.Logger.Warn("you should delete all of your manually added keep_services listings using `arv --format=uuid keep_service list | xargs -n1 arv keep_service delete --uuid` -- when those are deleted, the services listed in your cluster configuration will be used instead")
+       }
+       return nil
+}
+
+// Warn about keepstore servers that have no volumes.
+func (ldr *Loader) checkEmptyKeepstores(cluster arvados.Cluster) error {
+servers:
+       for url := range cluster.Services.Keepstore.InternalURLs {
+               for _, vol := range cluster.Volumes {
+                       if len(vol.AccessViaHosts) == 0 {
+                               // accessible by all servers
+                               return nil
+                       }
+                       if _, ok := vol.AccessViaHosts[url]; ok {
+                               continue servers
+                       }
+               }
+               ldr.Logger.Warnf("keepstore configured at %s does not have access to any volumes", url)
+       }
+       return nil
+}
+
+// Warn about AccessViaHosts entries that don't correspond to any of
+// the listed keepstore services.
+func (ldr *Loader) checkUnlistedKeepstores(cluster arvados.Cluster) error {
+       for uuid, vol := range cluster.Volumes {
+               if uuid == "SAMPLE" {
+                       continue
+               }
+               for url := range vol.AccessViaHosts {
+                       if _, ok := cluster.Services.Keepstore.InternalURLs[url]; !ok {
+                               ldr.Logger.Warnf("Volumes.%s.AccessViaHosts refers to nonexistent keepstore server %s", uuid, url)
+                       }
+               }
+       }
+       return nil
+}
diff --git a/lib/config/deprecated_keepstore_test.go b/lib/config/deprecated_keepstore_test.go
new file mode 100644 (file)
index 0000000..62f3f1e
--- /dev/null
@@ -0,0 +1,791 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package config
+
+import (
+       "bytes"
+       "encoding/json"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "os"
+       "sort"
+       "strconv"
+       "strings"
+       "text/template"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       check "gopkg.in/check.v1"
+)
+
+type KeepstoreMigrationSuite struct {
+       hostname string // blank = use test system's hostname
+       ksByPort map[int]arvados.KeepService
+}
+
+var _ = check.Suite(&KeepstoreMigrationSuite{})
+
+func (s *KeepstoreMigrationSuite) SetUpSuite(c *check.C) {
+       os.Setenv("ARVADOS_API_HOST", os.Getenv("ARVADOS_TEST_API_HOST"))
+       os.Setenv("ARVADOS_API_HOST_INSECURE", "1")
+       os.Setenv("ARVADOS_API_TOKEN", arvadostest.AdminToken)
+
+       // We don't need the keepstore servers, but we do need
+       // keep_services listings that point to localhost, rather than
+       // the apiserver fixtures that point to fictional hosts
+       // keep*.zzzzz.arvadosapi.com.
+
+       client := arvados.NewClientFromEnv()
+
+       // Delete existing non-proxy listings.
+       var svcList arvados.KeepServiceList
+       err := client.RequestAndDecode(&svcList, "GET", "arvados/v1/keep_services", nil, nil)
+       c.Assert(err, check.IsNil)
+       for _, ks := range svcList.Items {
+               if ks.ServiceType != "proxy" {
+                       err = client.RequestAndDecode(new(struct{}), "DELETE", "arvados/v1/keep_services/"+ks.UUID, nil, nil)
+                       c.Assert(err, check.IsNil)
+               }
+       }
+       // Add new fake listings.
+       s.ksByPort = map[int]arvados.KeepService{}
+       for _, port := range []int{25107, 25108} {
+               var ks arvados.KeepService
+               err = client.RequestAndDecode(&ks, "POST", "arvados/v1/keep_services", nil, map[string]interface{}{
+                       "keep_service": map[string]interface{}{
+                               "service_type": "disk",
+                               "service_host": "localhost",
+                               "service_port": port,
+                       },
+               })
+               c.Assert(err, check.IsNil)
+               s.ksByPort[port] = ks
+       }
+}
+
+func (s *KeepstoreMigrationSuite) checkEquivalentWithKeepstoreConfig(c *check.C, keepstoreconfig, clusterconfig, expectedconfig string) {
+       keepstorefile, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(keepstorefile.Name())
+       _, err = io.WriteString(keepstorefile, keepstoreconfig)
+       c.Assert(err, check.IsNil)
+       err = keepstorefile.Close()
+       c.Assert(err, check.IsNil)
+
+       gotldr := testLoader(c, clusterconfig, nil)
+       gotldr.KeepstorePath = keepstorefile.Name()
+       expectedldr := testLoader(c, expectedconfig, nil)
+       checkEquivalentLoaders(c, gotldr, expectedldr)
+}
+
+func (s *KeepstoreMigrationSuite) TestDeprecatedKeepstoreConfig(c *check.C) {
+       keyfile, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(keyfile.Name())
+       io.WriteString(keyfile, "blobsigningkey\n")
+
+       hostname, err := os.Hostname()
+       c.Assert(err, check.IsNil)
+
+       s.checkEquivalentWithKeepstoreConfig(c, `
+Listen: ":25107"
+Debug: true
+LogFormat: text
+MaxBuffers: 1234
+MaxRequests: 2345
+BlobSignatureTTL: 123m
+BlobSigningKeyFile: `+keyfile.Name()+`
+Volumes:
+- Type: Directory
+  Root: /tmp
+`, `
+Clusters:
+  z1111:
+    SystemRootToken: `+arvadostest.AdminToken+`
+    TLS: {Insecure: true}
+    Services:
+      Controller:
+        ExternalURL: "https://`+os.Getenv("ARVADOS_API_HOST")+`"
+`, `
+Clusters:
+  z1111:
+    SystemRootToken: `+arvadostest.AdminToken+`
+    TLS: {Insecure: true}
+    Services:
+      Keepstore:
+        InternalURLs:
+          "http://`+hostname+`:25107": {Rendezvous: `+s.ksByPort[25107].UUID[12:]+`}
+      Controller:
+        ExternalURL: "https://`+os.Getenv("ARVADOS_API_HOST")+`"
+    SystemLogs:
+      Format: text
+      LogLevel: debug
+    API:
+      MaxKeepBlobBuffers: 1234
+      MaxConcurrentRequests: 2345
+    Collections:
+      BlobSigningTTL: 123m
+      BlobSigningKey: blobsigningkey
+    Volumes:
+      z1111-nyw5e-`+s.ksByPort[25107].UUID[12:]+`:
+        AccessViaHosts:
+          "http://`+hostname+`:25107":
+            ReadOnly: false
+        Driver: Directory
+        DriverParameters:
+          Root: /tmp
+          Serialize: false
+        ReadOnly: false
+        Replication: 1
+        StorageClasses: {}
+`)
+}
+
+func (s *KeepstoreMigrationSuite) TestDiscoverLocalVolumes(c *check.C) {
+       tmpd, err := ioutil.TempDir("", "")
+       c.Assert(err, check.IsNil)
+       defer os.RemoveAll(tmpd)
+       err = os.Mkdir(tmpd+"/keep", 0777)
+       c.Assert(err, check.IsNil)
+
+       tmpf, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(tmpf.Name())
+
+       // read/write
+       _, err = fmt.Fprintf(tmpf, "/dev/xvdb %s ext4 rw,noexec 0 0\n", tmpd)
+       c.Assert(err, check.IsNil)
+
+       s.testDeprecatedVolume(c, "DiscoverVolumesFromMountsFile: "+tmpf.Name(), arvados.Volume{
+               Driver:      "Directory",
+               ReadOnly:    false,
+               Replication: 1,
+       }, &arvados.DirectoryVolumeDriverParameters{
+               Root:      tmpd + "/keep",
+               Serialize: false,
+       }, &arvados.DirectoryVolumeDriverParameters{})
+
+       // read-only
+       tmpf.Seek(0, os.SEEK_SET)
+       tmpf.Truncate(0)
+       _, err = fmt.Fprintf(tmpf, "/dev/xvdb %s ext4 ro,noexec 0 0\n", tmpd)
+       c.Assert(err, check.IsNil)
+
+       s.testDeprecatedVolume(c, "DiscoverVolumesFromMountsFile: "+tmpf.Name(), arvados.Volume{
+               Driver:      "Directory",
+               ReadOnly:    true,
+               Replication: 1,
+       }, &arvados.DirectoryVolumeDriverParameters{
+               Root:      tmpd + "/keep",
+               Serialize: false,
+       }, &arvados.DirectoryVolumeDriverParameters{})
+}
+
+func (s *KeepstoreMigrationSuite) TestDeprecatedVolumes(c *check.C) {
+       accesskeyfile, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(accesskeyfile.Name())
+       io.WriteString(accesskeyfile, "accesskeydata\n")
+
+       secretkeyfile, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(secretkeyfile.Name())
+       io.WriteString(secretkeyfile, "secretkeydata\n")
+
+       // s3, empty/default
+       s.testDeprecatedVolume(c, `
+Volumes:
+- Type: S3
+`, arvados.Volume{
+               Driver:      "S3",
+               Replication: 1,
+       }, &arvados.S3VolumeDriverParameters{}, &arvados.S3VolumeDriverParameters{})
+
+       // s3, fully configured
+       s.testDeprecatedVolume(c, `
+Volumes:
+- Type: S3
+  AccessKeyFile: `+accesskeyfile.Name()+`
+  SecretKeyFile: `+secretkeyfile.Name()+`
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: testbucket
+  LocationConstraint: true
+  IndexPageSize: 1234
+  S3Replication: 4
+  ConnectTimeout: 3m
+  ReadTimeout: 4m
+  RaceWindow: 5m
+  UnsafeDelete: true
+`, arvados.Volume{
+               Driver:      "S3",
+               Replication: 4,
+       }, &arvados.S3VolumeDriverParameters{
+               AccessKey:          "accesskeydata",
+               SecretKey:          "secretkeydata",
+               Endpoint:           "https://storage.googleapis.com",
+               Region:             "us-east-1z",
+               Bucket:             "testbucket",
+               LocationConstraint: true,
+               IndexPageSize:      1234,
+               ConnectTimeout:     arvados.Duration(time.Minute * 3),
+               ReadTimeout:        arvados.Duration(time.Minute * 4),
+               RaceWindow:         arvados.Duration(time.Minute * 5),
+               UnsafeDelete:       true,
+       }, &arvados.S3VolumeDriverParameters{})
+
+       // azure, empty/default
+       s.testDeprecatedVolume(c, `
+Volumes:
+- Type: Azure
+`, arvados.Volume{
+               Driver:      "Azure",
+               Replication: 1,
+       }, &arvados.AzureVolumeDriverParameters{}, &arvados.AzureVolumeDriverParameters{})
+
+       // azure, fully configured
+       s.testDeprecatedVolume(c, `
+Volumes:
+- Type: Azure
+  ReadOnly: true
+  StorageAccountName: storageacctname
+  StorageAccountKeyFile: `+secretkeyfile.Name()+`
+  StorageBaseURL: https://example.example
+  ContainerName: testctr
+  LocationConstraint: true
+  AzureReplication: 4
+  RequestTimeout: 3m
+  ListBlobsRetryDelay: 4m
+  ListBlobsMaxAttempts: 5
+`, arvados.Volume{
+               Driver:      "Azure",
+               ReadOnly:    true,
+               Replication: 4,
+       }, &arvados.AzureVolumeDriverParameters{
+               StorageAccountName:   "storageacctname",
+               StorageAccountKey:    "secretkeydata",
+               StorageBaseURL:       "https://example.example",
+               ContainerName:        "testctr",
+               RequestTimeout:       arvados.Duration(time.Minute * 3),
+               ListBlobsRetryDelay:  arvados.Duration(time.Minute * 4),
+               ListBlobsMaxAttempts: 5,
+       }, &arvados.AzureVolumeDriverParameters{})
+
+       // directory, empty/default
+       s.testDeprecatedVolume(c, `
+Volumes:
+- Type: Directory
+  Root: /tmp/xyzzy
+`, arvados.Volume{
+               Driver:      "Directory",
+               Replication: 1,
+       }, &arvados.DirectoryVolumeDriverParameters{
+               Root: "/tmp/xyzzy",
+       }, &arvados.DirectoryVolumeDriverParameters{})
+
+       // directory, fully configured
+       s.testDeprecatedVolume(c, `
+Volumes:
+- Type: Directory
+  ReadOnly: true
+  Root: /tmp/xyzzy
+  DirectoryReplication: 4
+  Serialize: true
+`, arvados.Volume{
+               Driver:      "Directory",
+               ReadOnly:    true,
+               Replication: 4,
+       }, &arvados.DirectoryVolumeDriverParameters{
+               Root:      "/tmp/xyzzy",
+               Serialize: true,
+       }, &arvados.DirectoryVolumeDriverParameters{})
+}
+
+func (s *KeepstoreMigrationSuite) testDeprecatedVolume(c *check.C, oldconfigdata string, expectvol arvados.Volume, expectparams interface{}, paramsdst interface{}) {
+       hostname := s.hostname
+       if hostname == "" {
+               h, err := os.Hostname()
+               c.Assert(err, check.IsNil)
+               hostname = h
+       }
+
+       oldconfig, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(oldconfig.Name())
+       io.WriteString(oldconfig, "Listen: :12345\n"+oldconfigdata)
+       if !strings.Contains(oldconfigdata, "DiscoverVolumesFromMountsFile") {
+               // Prevent tests from looking at the real /proc/mounts on the test host.
+               io.WriteString(oldconfig, "\nDiscoverVolumesFromMountsFile: /dev/null\n")
+       }
+
+       ldr := testLoader(c, "Clusters: {z1111: {}}", nil)
+       ldr.KeepstorePath = oldconfig.Name()
+       cfg, err := ldr.Load()
+       c.Assert(err, check.IsNil)
+       cc := cfg.Clusters["z1111"]
+       c.Check(cc.Volumes, check.HasLen, 1)
+       for uuid, v := range cc.Volumes {
+               c.Check(uuid, check.HasLen, 27)
+               c.Check(v.Driver, check.Equals, expectvol.Driver)
+               c.Check(v.Replication, check.Equals, expectvol.Replication)
+
+               avh, ok := v.AccessViaHosts[arvados.URL{Scheme: "http", Host: hostname + ":12345"}]
+               c.Check(ok, check.Equals, true)
+               c.Check(avh.ReadOnly, check.Equals, expectvol.ReadOnly)
+
+               err := json.Unmarshal(v.DriverParameters, paramsdst)
+               c.Check(err, check.IsNil)
+               c.Check(paramsdst, check.DeepEquals, expectparams)
+       }
+}
+
+// How we handle a volume from a legacy keepstore config file depends
+// on whether it's writable, whether a volume using the same cloud
+// backend already exists in the cluster config, and (if so) whether
+// it already has an AccessViaHosts entry for this host.
+//
+// In all cases, we should end up with an AccessViaHosts entry for
+// this host, to indicate that the current host's volumes have been
+// migrated.
+
+// Same backend already referenced in cluster config, this host
+// already listed in AccessViaHosts --> no change, except possibly
+// updating the ReadOnly flag on the AccessViaHosts entry.
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_AlreadyMigrated(c *check.C) {
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :12345
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: alreadymigrated
+  S3Replication: 3
+`)
+       checkEqualYAML(c, after, before)
+}
+
+// Writable volume, same cloud backend already referenced in cluster
+// config --> change UUID to match this keepstore's UUID.
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_UpdateUUID(c *check.C) {
+       port, expectUUID := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: readonlyonother
+  S3Replication: 3
+`)
+       c.Check(after, check.HasLen, len(before))
+       newuuids := s.findAddedVolumes(c, before, after, 1)
+       newvol := after[newuuids[0]]
+
+       var params arvados.S3VolumeDriverParameters
+       json.Unmarshal(newvol.DriverParameters, &params)
+       c.Check(params.Bucket, check.Equals, "readonlyonother")
+       c.Check(newuuids[0], check.Equals, expectUUID)
+}
+
+// Writable volume, same cloud backend not yet referenced --> add a
+// new volume, with UUID to match this keepstore's UUID.
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_AddCloudVolume(c *check.C) {
+       port, expectUUID := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: bucket-to-migrate
+  S3Replication: 3
+`)
+       newuuids := s.findAddedVolumes(c, before, after, 1)
+       newvol := after[newuuids[0]]
+
+       var params arvados.S3VolumeDriverParameters
+       json.Unmarshal(newvol.DriverParameters, &params)
+       c.Check(params.Bucket, check.Equals, "bucket-to-migrate")
+       c.Check(newvol.Replication, check.Equals, 3)
+
+       c.Check(newuuids[0], check.Equals, expectUUID)
+}
+
+// Writable volume, same filesystem backend already referenced in
+// cluster config, but this host isn't in AccessViaHosts --> add a new
+// volume, with UUID to match this keepstore's UUID (filesystem-backed
+// volumes are assumed to be different on different hosts, even if
+// paths are the same).
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_AddLocalVolume(c *check.C) {
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :12345
+Volumes:
+- Type: Directory
+  Root: /data/sdd
+  DirectoryReplication: 2
+`)
+       newuuids := s.findAddedVolumes(c, before, after, 1)
+       newvol := after[newuuids[0]]
+
+       var params arvados.DirectoryVolumeDriverParameters
+       json.Unmarshal(newvol.DriverParameters, &params)
+       c.Check(params.Root, check.Equals, "/data/sdd")
+       c.Check(newvol.Replication, check.Equals, 2)
+}
+
+// Writable volume, same filesystem backend already referenced in
+// cluster config, and this host is already listed in AccessViaHosts
+// --> already migrated, don't change anything.
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_LocalVolumeAlreadyMigrated(c *check.C) {
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :12345
+Volumes:
+- Type: Directory
+  Root: /data/sde
+  DirectoryReplication: 2
+`)
+       checkEqualYAML(c, after, before)
+}
+
+// Multiple writable cloud-backed volumes --> one of them will get a
+// UUID matching this keepstore's UUID.
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_AddMultipleCloudVolumes(c *check.C) {
+       port, expectUUID := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: first-bucket-to-migrate
+  S3Replication: 3
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: second-bucket-to-migrate
+  S3Replication: 3
+`)
+       newuuids := s.findAddedVolumes(c, before, after, 2)
+       // Sort by bucket name (so "first" comes before "second")
+       params := map[string]arvados.S3VolumeDriverParameters{}
+       for _, uuid := range newuuids {
+               var p arvados.S3VolumeDriverParameters
+               json.Unmarshal(after[uuid].DriverParameters, &p)
+               params[uuid] = p
+       }
+       sort.Slice(newuuids, func(i, j int) bool { return params[newuuids[i]].Bucket < params[newuuids[j]].Bucket })
+       newvol0, newvol1 := after[newuuids[0]], after[newuuids[1]]
+       params0, params1 := params[newuuids[0]], params[newuuids[1]]
+
+       c.Check(params0.Bucket, check.Equals, "first-bucket-to-migrate")
+       c.Check(newvol0.Replication, check.Equals, 3)
+
+       c.Check(params1.Bucket, check.Equals, "second-bucket-to-migrate")
+       c.Check(newvol1.Replication, check.Equals, 3)
+
+       // Don't care which one gets the special UUID
+       if newuuids[0] != expectUUID {
+               c.Check(newuuids[1], check.Equals, expectUUID)
+       }
+}
+
+// Non-writable volume, same cloud backend already referenced in
+// cluster config --> add this host to AccessViaHosts with
+// ReadOnly==true
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_UpdateWithReadOnly(c *check.C) {
+       port, _ := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: readonlyonother
+  S3Replication: 3
+  ReadOnly: true
+`)
+       hostname, err := os.Hostname()
+       c.Assert(err, check.IsNil)
+       url := arvados.URL{
+               Scheme: "http",
+               Host:   fmt.Sprintf("%s:%d", hostname, port),
+       }
+       _, ok := before["zzzzz-nyw5e-readonlyonother"].AccessViaHosts[url]
+       c.Check(ok, check.Equals, false)
+       _, ok = after["zzzzz-nyw5e-readonlyonother"].AccessViaHosts[url]
+       c.Check(ok, check.Equals, true)
+}
+
+// Writable volume, same cloud backend already writable by another
+// keepstore server --> add this host to AccessViaHosts with
+// ReadOnly==true
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_UpdateAlreadyWritable(c *check.C) {
+       port, _ := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: writableonother
+  S3Replication: 3
+  ReadOnly: false
+`)
+       hostname, err := os.Hostname()
+       c.Assert(err, check.IsNil)
+       url := arvados.URL{
+               Scheme: "http",
+               Host:   fmt.Sprintf("%s:%d", hostname, port),
+       }
+       _, ok := before["zzzzz-nyw5e-writableonother"].AccessViaHosts[url]
+       c.Check(ok, check.Equals, false)
+       _, ok = after["zzzzz-nyw5e-writableonother"].AccessViaHosts[url]
+       c.Check(ok, check.Equals, true)
+}
+
+// Non-writable volume, same cloud backend not already referenced in
+// cluster config --> assign a new random volume UUID.
+func (s *KeepstoreMigrationSuite) TestIncrementalVolumeMigration_AddReadOnly(c *check.C) {
+       port, _ := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+       before, after := s.loadWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Region: us-east-1z
+  Bucket: differentbucket
+  S3Replication: 3
+`)
+       newuuids := s.findAddedVolumes(c, before, after, 1)
+       newvol := after[newuuids[0]]
+
+       var params arvados.S3VolumeDriverParameters
+       json.Unmarshal(newvol.DriverParameters, &params)
+       c.Check(params.Bucket, check.Equals, "differentbucket")
+
+       hostname, err := os.Hostname()
+       c.Assert(err, check.IsNil)
+       _, ok := newvol.AccessViaHosts[arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%d", hostname, port)}]
+       c.Check(ok, check.Equals, true)
+}
+
+// Ensure logs mention unmigrated servers.
+func (s *KeepstoreMigrationSuite) TestPendingKeepstoreMigrations(c *check.C) {
+       client := arvados.NewClientFromEnv()
+       for _, host := range []string{"keep0", "keep1"} {
+               err := client.RequestAndDecode(new(struct{}), "POST", "arvados/v1/keep_services", nil, map[string]interface{}{
+                       "keep_service": map[string]interface{}{
+                               "service_type": "disk",
+                               "service_host": host + ".zzzzz.example.com",
+                               "service_port": 25107,
+                       },
+               })
+               c.Assert(err, check.IsNil)
+       }
+
+       port, _ := s.getTestKeepstorePortAndMatchingVolumeUUID(c)
+       logs := s.logsWithKeepstoreConfig(c, `
+Listen: :`+strconv.Itoa(port)+`
+Volumes:
+- Type: S3
+  Endpoint: https://storage.googleapis.com
+  Bucket: foo
+`)
+       c.Check(logs, check.Matches, `(?ms).*you should remove the legacy keepstore config file.*`)
+       c.Check(logs, check.Matches, `(?ms).*you should migrate the legacy keepstore configuration file on host keep1.zzzzz.example.com.*`)
+       c.Check(logs, check.Not(check.Matches), `(?ms).*should migrate.*keep0.zzzzz.example.com.*`)
+       c.Check(logs, check.Matches, `(?ms).*keepstore configured at http://keep2.zzzzz.example.com:25107 does not have access to any volumes.*`)
+       c.Check(logs, check.Matches, `(?ms).*Volumes.zzzzz-nyw5e-possconfigerror.AccessViaHosts refers to nonexistent keepstore server http://keep00.zzzzz.example.com:25107.*`)
+}
+
+const clusterConfigForKeepstoreMigrationTest = `
+Clusters:
+  zzzzz:
+    SystemRootToken: ` + arvadostest.AdminToken + `
+    Services:
+      Keepstore:
+        InternalURLs:
+          "http://{{.hostname}}:12345": {}
+          "http://keep0.zzzzz.example.com:25107": {}
+          "http://keep2.zzzzz.example.com:25107": {}
+      Controller:
+        ExternalURL: "https://{{.controller}}"
+    TLS:
+      Insecure: true
+    Volumes:
+
+      zzzzz-nyw5e-alreadymigrated:
+        AccessViaHosts:
+          "http://{{.hostname}}:12345": {}
+        Driver: S3
+        DriverParameters:
+          Endpoint: https://storage.googleapis.com
+          Region: us-east-1z
+          Bucket: alreadymigrated
+        Replication: 3
+
+      zzzzz-nyw5e-readonlyonother:
+        AccessViaHosts:
+          "http://keep0.zzzzz.example.com:25107": {ReadOnly: true}
+        Driver: S3
+        DriverParameters:
+          Endpoint: https://storage.googleapis.com
+          Region: us-east-1z
+          Bucket: readonlyonother
+        Replication: 3
+
+      zzzzz-nyw5e-writableonother:
+        AccessViaHosts:
+          "http://keep0.zzzzz.example.com:25107": {}
+        Driver: S3
+        DriverParameters:
+          Endpoint: https://storage.googleapis.com
+          Region: us-east-1z
+          Bucket: writableonother
+        Replication: 3
+
+      zzzzz-nyw5e-localfilesystem:
+        AccessViaHosts:
+          "http://keep0.zzzzz.example.com:25107": {}
+        Driver: Directory
+        DriverParameters:
+          Root: /data/sdd
+        Replication: 1
+
+      zzzzz-nyw5e-localismigrated:
+        AccessViaHosts:
+          "http://{{.hostname}}:12345": {}
+        Driver: Directory
+        DriverParameters:
+          Root: /data/sde
+        Replication: 1
+
+      zzzzz-nyw5e-possconfigerror:
+        AccessViaHosts:
+          "http://keep00.zzzzz.example.com:25107": {}
+        Driver: Directory
+        DriverParameters:
+          Root: /data/sdf
+        Replication: 1
+`
+
+// Determine the effect of combining the given legacy keepstore config
+// YAML (just the "Volumes" entries of an old keepstore config file)
+// with the example clusterConfigForKeepstoreMigrationTest config.
+//
+// Return two Volumes configs -- one without loading keepstoreYAML
+// ("before") and one with ("after") -- for the caller to compare.
+func (s *KeepstoreMigrationSuite) loadWithKeepstoreConfig(c *check.C, keepstoreYAML string) (before, after map[string]arvados.Volume) {
+       ldr := testLoader(c, s.clusterConfigYAML(c), nil)
+       cBefore, err := ldr.Load()
+       c.Assert(err, check.IsNil)
+
+       keepstoreconfig, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(keepstoreconfig.Name())
+       io.WriteString(keepstoreconfig, keepstoreYAML)
+
+       ldr = testLoader(c, s.clusterConfigYAML(c), nil)
+       ldr.KeepstorePath = keepstoreconfig.Name()
+       cAfter, err := ldr.Load()
+       c.Assert(err, check.IsNil)
+
+       return cBefore.Clusters["zzzzz"].Volumes, cAfter.Clusters["zzzzz"].Volumes
+}
+
+// Return the log messages emitted when loading keepstoreYAML along
+// with clusterConfigForKeepstoreMigrationTest.
+func (s *KeepstoreMigrationSuite) logsWithKeepstoreConfig(c *check.C, keepstoreYAML string) string {
+       var logs bytes.Buffer
+
+       keepstoreconfig, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(keepstoreconfig.Name())
+       io.WriteString(keepstoreconfig, keepstoreYAML)
+
+       ldr := testLoader(c, s.clusterConfigYAML(c), &logs)
+       ldr.KeepstorePath = keepstoreconfig.Name()
+       _, err = ldr.Load()
+       c.Assert(err, check.IsNil)
+
+       return logs.String()
+}
+
+func (s *KeepstoreMigrationSuite) clusterConfigYAML(c *check.C) string {
+       hostname, err := os.Hostname()
+       c.Assert(err, check.IsNil)
+
+       tmpl := template.Must(template.New("config").Parse(clusterConfigForKeepstoreMigrationTest))
+
+       var clusterconfigdata bytes.Buffer
+       err = tmpl.Execute(&clusterconfigdata, map[string]interface{}{
+               "hostname":   hostname,
+               "controller": os.Getenv("ARVADOS_API_HOST"),
+       })
+       c.Assert(err, check.IsNil)
+
+       return clusterconfigdata.String()
+}
+
+// Return the uuids of volumes that appear in "after" but not
+// "before".
+//
+// Assert the returned slice has at least minAdded entries.
+func (s *KeepstoreMigrationSuite) findAddedVolumes(c *check.C, before, after map[string]arvados.Volume, minAdded int) (uuids []string) {
+       for uuid := range after {
+               if _, ok := before[uuid]; !ok {
+                       uuids = append(uuids, uuid)
+               }
+       }
+       if len(uuids) < minAdded {
+               c.Assert(uuids, check.HasLen, minAdded)
+       }
+       return
+}
+
+func (s *KeepstoreMigrationSuite) getTestKeepstorePortAndMatchingVolumeUUID(c *check.C) (int, string) {
+       for port, ks := range s.ksByPort {
+               c.Assert(ks.UUID, check.HasLen, 27)
+               return port, "zzzzz-nyw5e-" + ks.UUID[12:]
+       }
+       c.Fatal("s.ksByPort is empty")
+       return 0, ""
+}
+
+func (s *KeepstoreMigrationSuite) TestKeepServiceIsMe(c *check.C) {
+       for i, trial := range []struct {
+               match       bool
+               hostname    string
+               listen      string
+               serviceHost string
+               servicePort int
+       }{
+               {true, "keep0", "keep0", "keep0", 80},
+               {true, "keep0", "[::1]:http", "keep0", 80},
+               {true, "keep0", "[::]:http", "keep0", 80},
+               {true, "keep0", "keep0:25107", "keep0", 25107},
+               {true, "keep0", ":25107", "keep0", 25107},
+               {true, "keep0.domain", ":25107", "keep0.domain.example", 25107},
+               {true, "keep0.domain.example", ":25107", "keep0.domain.example", 25107},
+               {true, "keep0", ":25107", "keep0.domain.example", 25107},
+               {true, "keep0", ":25107", "Keep0.domain.example", 25107},
+               {true, "keep0", ":http", "keep0.domain.example", 80},
+               {true, "keep0", ":25107", "localhost", 25107},
+               {true, "keep0", ":25107", "::1", 25107},
+               {false, "keep0", ":25107", "keep0", 1111},              // different port
+               {false, "keep0", ":25107", "localhost", 1111},          // different port
+               {false, "keep0", ":http", "keep0.domain.example", 443}, // different port
+               {false, "keep0", ":bogussss", "keep0", 25107},          // unresolvable port
+               {false, "keep0", ":25107", "keep1", 25107},             // different hostname
+               {false, "keep1", ":25107", "keep10", 25107},            // different hostname (prefix, but not on a "." boundary)
+       } {
+               c.Check(keepServiceIsMe(arvados.KeepService{ServiceHost: trial.serviceHost, ServicePort: trial.servicePort}, trial.hostname, trial.listen), check.Equals, trial.match, check.Commentf("trial #%d: %#v", i, trial))
+       }
+}
index 5dda0ba94457eb0dd5c76a85a22f38a798011de2..ff1bb9434a42c8babc3cedef9165e7ad3d16d949 100644 (file)
@@ -47,7 +47,7 @@ func testLoadLegacyConfig(content []byte, mungeFlag string, c *check.C) (*arvado
 func (s *LoadSuite) TestDeprecatedNodeProfilesToServices(c *check.C) {
        hostname, err := os.Hostname()
        c.Assert(err, check.IsNil)
-       s.checkEquivalent(c, `
+       checkEquivalent(c, `
 Clusters:
  z1111:
   NodeProfiles:
@@ -216,3 +216,60 @@ func (s *LoadSuite) TestLegacyArvGitHttpdConfig(c *check.C) {
        c.Check(cluster.Git.Repositories, check.Equals, "/test/reporoot")
        c.Check(cluster.Services.Keepproxy.InternalURLs[arvados.URL{Host: ":9000"}], check.Equals, arvados.ServiceInstance{})
 }
+
+func (s *LoadSuite) TestLegacyKeepBalanceConfig(c *check.C) {
+       f := "-legacy-keepbalance-config"
+       content := []byte(fmtKeepBalanceConfig(""))
+       cluster, err := testLoadLegacyConfig(content, f, c)
+
+       c.Check(err, check.IsNil)
+       c.Check(cluster, check.NotNil)
+       c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+       c.Check(cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: ":80"}], check.Equals, arvados.ServiceInstance{})
+       c.Check(cluster.Collections.BalanceCollectionBuffers, check.Equals, 1000)
+       c.Check(cluster.Collections.BalanceCollectionBatch, check.Equals, 100000)
+       c.Check(cluster.Collections.BalancePeriod.String(), check.Equals, "10m")
+       c.Check(cluster.Collections.BlobMissingReport, check.Equals, "testfile")
+       c.Check(cluster.API.KeepServiceRequestTimeout.String(), check.Equals, "30m")
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceTypes":["disk"],`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.IsNil)
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceTypes":[],`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.IsNil)
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceTypes":["proxy"],`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.NotNil)
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceTypes":["disk", "proxy"],`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.NotNil)
+
+       content = []byte(fmtKeepBalanceConfig(`"KeepServiceList":{},`))
+       _, err = testLoadLegacyConfig(content, f, c)
+       c.Check(err, check.NotNil)
+}
+
+func fmtKeepBalanceConfig(param string) string {
+       return fmt.Sprintf(`
+{
+       "Client": {
+               "Scheme": "",
+               "APIHost": "example.com",
+               "AuthToken": "abcdefg",
+               "Insecure": false
+       },
+       "Listen": ":80",
+       %s
+       "RunPeriod": "10m",
+       "CollectionBatchSize": 100000,
+       "CollectionBuffers": 1000,
+       "RequestTimeout": "30m",
+       "ManagementToken": "xyzzy",
+       "LostBlocksFile": "testfile"
+}
+`, param)
+}
index 69aae2c624a68ed4fcf5837739e33e3d97fc30e7..5437836f6fee05f3aded39954ea8d626d3c12f6e 100644 (file)
@@ -63,8 +63,10 @@ var whitelist = map[string]bool{
        "API":                                          true,
        "API.AsyncPermissionsUpdateInterval":           false,
        "API.DisabledAPIs":                             false,
+       "API.MaxConcurrentRequests":                    false,
        "API.MaxIndexDatabaseRead":                     false,
        "API.MaxItemsPerResponse":                      true,
+       "API.MaxKeepBlobBuffers":                       false,
        "API.MaxRequestAmplification":                  false,
        "API.MaxRequestSize":                           true,
        "API.RailsSessionSecretToken":                  false,
@@ -81,6 +83,12 @@ var whitelist = map[string]bool{
        "Collections.BlobSigning":                      true,
        "Collections.BlobSigningKey":                   false,
        "Collections.BlobSigningTTL":                   true,
+       "Collections.BlobTrash":                        false,
+       "Collections.BlobTrashLifetime":                false,
+       "Collections.BlobTrashConcurrency":             false,
+       "Collections.BlobTrashCheckInterval":           false,
+       "Collections.BlobDeleteConcurrency":            false,
+       "Collections.BlobReplicateConcurrency":         false,
        "Collections.CollectionVersioning":             false,
        "Collections.DefaultReplication":               true,
        "Collections.DefaultTrashLifetime":             true,
@@ -91,6 +99,10 @@ var whitelist = map[string]bool{
        "Collections.TrashSweepInterval":               false,
        "Collections.TrustAllContent":                  false,
        "Collections.WebDAVCache":                      false,
+       "Collections.BalanceCollectionBatch":           false,
+       "Collections.BalancePeriod":                    false,
+       "Collections.BlobMissingReport":                false,
+       "Collections.BalanceCollectionBuffers":         false,
        "Containers":                                   true,
        "Containers.CloudVMs":                          false,
        "Containers.CrunchRunCommand":                  false,
@@ -154,6 +166,16 @@ var whitelist = map[string]bool{
        "Users.NewUsersAreActive":                      false,
        "Users.UserNotifierEmailFrom":                  false,
        "Users.UserProfileNotificationAddress":         false,
+       "Volumes":                                      true,
+       "Volumes.*":                                    true,
+       "Volumes.*.*":                                  false,
+       "Volumes.*.AccessViaHosts":                     true,
+       "Volumes.*.AccessViaHosts.*":                   true,
+       "Volumes.*.AccessViaHosts.*.ReadOnly":          true,
+       "Volumes.*.ReadOnly":                           true,
+       "Volumes.*.Replication":                        true,
+       "Volumes.*.StorageClasses":                     true,
+       "Volumes.*.StorageClasses.*":                   false,
        "Workbench":                                    true,
        "Workbench.ActivationContactLink":              false,
        "Workbench.APIClientConnectTimeout":            true,
index f8a0e097dc7d4e3f9de577d747cd9c0b54a87d0b..d21bb2d284b57d5dfff01a346c0708d98ca8db13 100644 (file)
@@ -31,8 +31,15 @@ Clusters:
       # listening, and reachable from other hosts in the cluster.
       SAMPLE:
         InternalURLs:
-          "http://example.host:12345": {}
-          SAMPLE: {}
+          "http://host1.example:12345": {}
+          "http://host2.example:12345":
+            # Rendezvous is normally empty/omitted. When changing the
+            # URL of a Keepstore service, Rendezvous should be set to
+            # the old URL (with trailing slash omitted) to preserve
+            # rendezvous ordering.
+            Rendezvous: ""
+          SAMPLE:
+            Rendezvous: ""
         ExternalURL: "-"
 
       RailsAPI:
@@ -182,6 +189,15 @@ Clusters:
       # parameter higher than this value, this value is used instead.
       MaxItemsPerResponse: 1000
 
+      # Maximum number of concurrent requests to accept in a single
+      # service process, or 0 for no limit. Currently supported only
+      # by keepstore.
+      MaxConcurrentRequests: 0
+
+      # Maximum number of 64MiB memory buffers per keepstore server
+      # process, or 0 for no limit.
+      MaxKeepBlobBuffers: 128
+
       # API methods to disable. Disabled methods are not listed in the
       # discovery document, and respond 404 to all requests.
       # Example: {"jobs.create":{}, "pipeline_instances.create": {}}
@@ -309,43 +325,75 @@ Clusters:
       MaxRequestLogParamsSize: 2000
 
     Collections:
-      # Allow clients to create collections by providing a manifest with
-      # unsigned data blob locators. IMPORTANT: This effectively disables
-      # access controls for data stored in Keep: a client who knows a hash
-      # can write a manifest that references the hash, pass it to
-      # collections.create (which will create a permission link), use
-      # collections.get to obtain a signature for that data locator, and
-      # use that signed locator to retrieve the data from Keep. Therefore,
-      # do not turn this on if your users expect to keep data private from
-      # one another!
+
+      # Enable access controls for data stored in Keep. This should
+      # always be set to true on a production cluster.
       BlobSigning: true
 
       # BlobSigningKey is a string of alphanumeric characters used to
       # generate permission signatures for Keep locators. It must be
-      # identical to the permission key given to Keep. IMPORTANT: This is
-      # a site secret. It should be at least 50 characters.
+      # identical to the permission key given to Keep. IMPORTANT: This
+      # is a site secret. It should be at least 50 characters.
       #
       # Modifying BlobSigningKey will invalidate all existing
       # signatures, which can cause programs to fail (e.g., arv-put,
-      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only when
-      # no such processes are running.
+      # arv-get, and Crunch jobs).  To avoid errors, rotate keys only
+      # when no such processes are running.
       BlobSigningKey: ""
 
+      # Enable garbage collection of unreferenced blobs in Keep.
+      BlobTrash: true
+
+      # Time to leave unreferenced blobs in "trashed" state before
+      # deleting them, or 0 to skip the "trashed" state entirely and
+      # delete unreferenced blobs.
+      #
+      # If you use any Amazon S3 buckets as storage volumes, this
+      # must be at least 24h to avoid occasional data loss.
+      BlobTrashLifetime: 336h
+
+      # How often to check for (and delete) trashed blocks whose
+      # BlobTrashLifetime has expired.
+      BlobTrashCheckInterval: 24h
+
+      # Maximum number of concurrent "trash blob" and "delete trashed
+      # blob" operations conducted by a single keepstore process. Each
+      # of these can be set to 0 to disable the respective operation.
+      #
+      # If BlobTrashLifetime is zero, "trash" and "delete trash"
+      # happen at once, so only the lower of these two values is used.
+      BlobTrashConcurrency: 4
+      BlobDeleteConcurrency: 4
+
+      # Maximum number of concurrent "create additional replica of
+      # existing blob" operations conducted by a single keepstore
+      # process.
+      BlobReplicateConcurrency: 4
+
       # Default replication level for collections. This is used when a
       # collection's replication_desired attribute is nil.
       DefaultReplication: 2
 
-      # Lifetime (in seconds) of blob permission signatures generated by
-      # the API server. This determines how long a client can take (after
-      # retrieving a collection record) to retrieve the collection data
-      # from Keep. If the client needs more time than that (assuming the
-      # collection still has the same content and the relevant user/token
-      # still has permission) the client can retrieve the collection again
-      # to get fresh signatures.
+      # BlobSigningTTL determines the minimum lifetime of transient
+      # data, i.e., blocks that are not referenced by
+      # collections. Unreferenced blocks exist for two reasons:
+      #
+      # 1) A data block must be written to a disk/cloud backend device
+      # before a collection can be created/updated with a reference to
+      # it.
+      #
+      # 2) Deleting or updating a collection can remove the last
+      # remaining reference to a data block.
       #
-      # This must be exactly equal to the -blob-signature-ttl flag used by
-      # keepstore servers.  Otherwise, reading data blocks and saving
-      # collections will fail with HTTP 403 permission errors.
+      # If BlobSigningTTL is too short, long-running
+      # processes/containers will fail when they take too long (a)
+      # between writing blocks and writing collections that reference
+      # them, or (b) between reading collections and reading the
+      # referenced blocks.
+      #
+      # If BlobSigningTTL is too long, data will still be stored long
+      # after the referring collections are deleted, and you will
+      # needlessly fill up disks or waste money on cloud storage.
       #
       # Modifying BlobSigningTTL invalidates existing signatures; see
       # BlobSigningKey note above.
@@ -353,6 +401,36 @@ Clusters:
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
+      # When running keep-balance, this is the destination filename for
+      # the list of lost block hashes if there are any, one per line.
+      # Updated automically during each successful run.
+      BlobMissingReport: ""
+
+      # keep-balance operates periodically, i.e.: do a
+      # scan/balance operation, sleep, repeat.
+      #
+      # BalancePeriod determines the interval between start times of
+      # successive scan/balance operations. If a scan/balance operation
+      # takes longer than RunPeriod, the next one will follow it
+      # immediately.
+      #
+      # If SIGUSR1 is received during an idle period between operations,
+      # the next operation will start immediately.
+      BalancePeriod: 10m
+
+      # Limits the number of collections retrieved by keep-balance per
+      # API transaction. If this is zero, page size is
+      # determined by the API server's own page size limits (see
+      # API.MaxItemsPerResponse and API.MaxIndexDatabaseRead).
+      BalanceCollectionBatch: 0
+
+      # The size of keep-balance's internal queue of
+      # collections. Higher values use more memory and improve throughput
+      # by allowing keep-balance to fetch the next page of collections
+      # while the current page is still being processed. If this is zero
+      # or omitted, pages are processed serially.
+      BalanceCollectionBuffers: 1000
+
       # Default lifetime for ephemeral collections: 2 weeks. This must not
       # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
@@ -756,6 +834,67 @@ Clusters:
         Price: 0.1
         Preemptible: false
 
+    Volumes:
+      SAMPLE:
+        # AccessViaHosts specifies which keepstore processes can read
+        # and write data on the volume.
+        #
+        # For a local filesystem, AccessViaHosts has one entry,
+        # indicating which server the filesystem is located on.
+        #
+        # For a network-attached backend accessible by all keepstore
+        # servers, like a cloud storage bucket or an NFS mount,
+        # AccessViaHosts can be empty/omitted.
+        #
+        # Further info/examples:
+        # https://doc.arvados.org/install/configure-fs-storage.html
+        # https://doc.arvados.org/install/configure-s3-object-storage.html
+        # https://doc.arvados.org/install/configure-azure-blob-storage.html
+        AccessViaHosts:
+          SAMPLE:
+            ReadOnly: false
+          "http://host1.example:25107": {}
+        ReadOnly: false
+        Replication: 1
+        StorageClasses:
+          default: true
+          SAMPLE: true
+        Driver: s3
+        DriverParameters:
+
+          # for s3 driver -- see
+          # https://doc.arvados.org/install/configure-s3-object-storage.html
+          IAMRole: aaaaa
+          AccessKey: aaaaa
+          SecretKey: aaaaa
+          Endpoint: ""
+          Region: us-east-1a
+          Bucket: aaaaa
+          LocationConstraint: false
+          IndexPageSize: 1000
+          ConnectTimeout: 1m
+          ReadTimeout: 10m
+          RaceWindow: 24h
+          UnsafeDelete: false
+
+          # for azure driver -- see
+          # https://doc.arvados.org/install/configure-azure-blob-storage.html
+          StorageAccountName: aaaaa
+          StorageAccountKey: aaaaa
+          StorageBaseURL: core.windows.net
+          ContainerName: aaaaa
+          RequestTimeout: 30s
+          ListBlobsRetryDelay: 10s
+          ListBlobsMaxAttempts: 10
+          MaxGetBytes: 0
+          WriteRaceInterval: 15s
+          WriteRacePollTime: 1s
+
+          # for local directory driver -- see
+          # https://doc.arvados.org/install/configure-fs-storage.html
+          Root: /var/lib/arvados/keep-data
+          Serialize: false
+
     Mail:
       MailchimpAPIKey: ""
       MailchimpListID: ""
index 7e48493939cd67a8322e67fe9f14bf357f26cd76..21d17227372d4d3f6776b2526581508d89c937ef 100644 (file)
@@ -28,6 +28,7 @@ type Loader struct {
        Logger         logrus.FieldLogger
        SkipDeprecated bool // Don't load deprecated config keys
        SkipLegacy     bool // Don't load legacy config files
+       SkipAPICalls   bool // Don't do checks that call RailsAPI/controller
 
        Path                    string
        KeepstorePath           string
@@ -36,6 +37,7 @@ type Loader struct {
        WebsocketPath           string
        KeepproxyPath           string
        GitHttpdPath            string
+       KeepBalancePath         string
 
        configdata []byte
 }
@@ -68,6 +70,7 @@ func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
        flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
        flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
        flagset.StringVar(&ldr.GitHttpdPath, "legacy-git-httpd-config", defaultGitHttpdConfigPath, "Legacy arv-git-httpd configuration `file`")
+       flagset.StringVar(&ldr.KeepBalancePath, "legacy-keepbalance-config", defaultKeepBalanceConfigPath, "Legacy keep-balance configuration `file`")
        flagset.BoolVar(&ldr.SkipLegacy, "skip-legacy", false, "Don't load legacy config files")
 }
 
@@ -148,6 +151,9 @@ func (ldr *Loader) MungeLegacyConfigArgs(lgr logrus.FieldLogger, args []string,
        if legacyConfigArg != "-legacy-git-httpd-config" {
                ldr.GitHttpdPath = ""
        }
+       if legacyConfigArg != "-legacy-keepbalance-config" {
+               ldr.KeepBalancePath = ""
+       }
 
        return munged
 }
@@ -244,12 +250,14 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                // * no primary config was loaded, and this is the
                // legacy config file for the current component
                for _, err := range []error{
+                       ldr.loadOldEnvironmentVariables(&cfg),
                        ldr.loadOldKeepstoreConfig(&cfg),
                        ldr.loadOldKeepWebConfig(&cfg),
                        ldr.loadOldCrunchDispatchSlurmConfig(&cfg),
                        ldr.loadOldWebsocketConfig(&cfg),
                        ldr.loadOldKeepproxyConfig(&cfg),
                        ldr.loadOldGitHttpdConfig(&cfg),
+                       ldr.loadOldKeepBalanceConfig(&cfg),
                } {
                        if err != nil {
                                return nil, err
@@ -259,9 +267,14 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
 
        // Check for known mistakes
        for id, cc := range cfg.Clusters {
-               err = checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection)
-               if err != nil {
-                       return nil, err
+               for _, err = range []error{
+                       checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
+                       ldr.checkEmptyKeepstores(cc),
+                       ldr.checkUnlistedKeepstores(cc),
+               } {
+                       if err != nil {
+                               return nil, err
+                       }
                }
        }
        return &cfg, nil
index c7289350ec8c09ca86637edab258cd32874882e7..2e521ab0932218cd2f2e1e7f7f976dbbdf3c26d4 100644 (file)
@@ -46,6 +46,12 @@ func testLoader(c *check.C, configdata string, logdst io.Writer) *Loader {
 
 type LoadSuite struct{}
 
+func (s *LoadSuite) SetUpSuite(c *check.C) {
+       os.Unsetenv("ARVADOS_API_HOST")
+       os.Unsetenv("ARVADOS_API_HOST_INSECURE")
+       os.Unsetenv("ARVADOS_API_TOKEN")
+}
+
 func (s *LoadSuite) TestEmpty(c *check.C) {
        cfg, err := testLoader(c, "", nil).Load()
        c.Check(cfg, check.IsNil)
@@ -321,7 +327,7 @@ Clusters:
 }
 
 func (s *LoadSuite) TestMovedKeys(c *check.C) {
-       s.checkEquivalent(c, `# config has old keys only
+       checkEquivalent(c, `# config has old keys only
 Clusters:
  zzzzz:
   RequestLimits:
@@ -334,7 +340,7 @@ Clusters:
    MaxRequestAmplification: 3
    MaxItemsPerResponse: 999
 `)
-       s.checkEquivalent(c, `# config has both old and new keys; old values win
+       checkEquivalent(c, `# config has both old and new keys; old values win
 Clusters:
  zzzzz:
   RequestLimits:
@@ -352,30 +358,45 @@ Clusters:
 `)
 }
 
-func (s *LoadSuite) checkEquivalent(c *check.C, goty, expectedy string) {
-       got, err := testLoader(c, goty, nil).Load()
+func checkEquivalent(c *check.C, goty, expectedy string) {
+       gotldr := testLoader(c, goty, nil)
+       expectedldr := testLoader(c, expectedy, nil)
+       checkEquivalentLoaders(c, gotldr, expectedldr)
+}
+
+func checkEqualYAML(c *check.C, got, expected interface{}) {
+       expectedyaml, err := yaml.Marshal(expected)
        c.Assert(err, check.IsNil)
-       expected, err := testLoader(c, expectedy, nil).Load()
+       gotyaml, err := yaml.Marshal(got)
        c.Assert(err, check.IsNil)
-       if !c.Check(got, check.DeepEquals, expected) {
+       if !bytes.Equal(gotyaml, expectedyaml) {
                cmd := exec.Command("diff", "-u", "--label", "expected", "--label", "got", "/dev/fd/3", "/dev/fd/4")
-               for _, obj := range []interface{}{expected, got} {
-                       y, _ := yaml.Marshal(obj)
+               for _, y := range [][]byte{expectedyaml, gotyaml} {
                        pr, pw, err := os.Pipe()
                        c.Assert(err, check.IsNil)
                        defer pr.Close()
-                       go func() {
-                               io.Copy(pw, bytes.NewBuffer(y))
+                       go func(data []byte) {
+                               pw.Write(data)
                                pw.Close()
-                       }()
+                       }(y)
                        cmd.ExtraFiles = append(cmd.ExtraFiles, pr)
                }
                diff, err := cmd.CombinedOutput()
+               // diff should report differences and exit non-zero.
+               c.Check(err, check.NotNil)
                c.Log(string(diff))
-               c.Check(err, check.IsNil)
+               c.Error("got != expected; see diff (-expected +got) above")
        }
 }
 
+func checkEquivalentLoaders(c *check.C, gotldr, expectedldr *Loader) {
+       got, err := gotldr.Load()
+       c.Assert(err, check.IsNil)
+       expected, err := expectedldr.Load()
+       c.Assert(err, check.IsNil)
+       checkEqualYAML(c, got, expected)
+}
+
 func checkListKeys(path string, x interface{}) (err error) {
        v := reflect.Indirect(reflect.ValueOf(x))
        switch v.Kind() {
index 4345370469d07f3d5be685b9dd2e4a0efbe1ab7b..b9af60fe9d0d05d6f4fe4f205e795475a3d64658 100644 (file)
@@ -10,10 +10,11 @@ import (
        "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/lib/service"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/prometheus/client_golang/prometheus"
 )
 
 var Command cmd.Handler = service.Command(arvados.ServiceNameController, newHandler)
 
-func newHandler(_ context.Context, cluster *arvados.Cluster, _ string) service.Handler {
+func newHandler(_ context.Context, cluster *arvados.Cluster, _ string, _ *prometheus.Registry) service.Handler {
        return &Handler{Cluster: cluster}
 }
index 4b49e46154e24b057ba5750c21f9f47e4541d7ae..d34df7f2c45918103949300ca6619027696b757f 100644 (file)
@@ -19,6 +19,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "github.com/prometheus/client_golang/prometheus"
        check "gopkg.in/check.v1"
 )
 
@@ -52,7 +53,7 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
        s.cluster.TLS.Insecure = true
        arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
        arvadostest.SetServiceURL(&s.cluster.Services.Controller, "http://localhost:/")
-       s.handler = newHandler(s.ctx, s.cluster, "")
+       s.handler = newHandler(s.ctx, s.cluster, "", prometheus.NewRegistry())
 }
 
 func (s *HandlerSuite) TearDownTest(c *check.C) {
index ae6ac70e9665f777069232d49eaf0dd76a66d1a2..7ab38c6cabb0a6374621bcc63607f15fa4d64ddc 100644 (file)
@@ -11,11 +11,12 @@ import (
        "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/lib/service"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/prometheus/client_golang/prometheus"
 )
 
 var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchCloud, newHandler)
 
-func newHandler(ctx context.Context, cluster *arvados.Cluster, token string) service.Handler {
+func newHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
        ac, err := arvados.NewClientFromConfig(cluster)
        if err != nil {
                return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
@@ -25,6 +26,7 @@ func newHandler(ctx context.Context, cluster *arvados.Cluster, token string) ser
                Context:   ctx,
                ArvClient: ac,
                AuthToken: token,
+               Registry:  reg,
        }
        go d.Start()
        return d
index 731c6d25d72c20243dba43e9636f03c786bb1423..f0aa83c2e05bb097a1ce2c5fc5264b3e42742b5d 100644 (file)
@@ -48,10 +48,10 @@ type dispatcher struct {
        Context       context.Context
        ArvClient     *arvados.Client
        AuthToken     string
+       Registry      *prometheus.Registry
        InstanceSetID cloud.InstanceSetID
 
        logger      logrus.FieldLogger
-       reg         *prometheus.Registry
        instanceSet cloud.InstanceSet
        pool        pool
        queue       scheduler.ContainerQueue
@@ -132,14 +132,13 @@ func (disp *dispatcher) initialize() {
                disp.sshKey = key
        }
 
-       disp.reg = prometheus.NewRegistry()
-       instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID, disp.logger, disp.reg)
+       instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID, disp.logger, disp.Registry)
        if err != nil {
                disp.logger.Fatalf("error initializing driver: %s", err)
        }
        disp.instanceSet = instanceSet
-       disp.pool = worker.NewPool(disp.logger, disp.ArvClient, disp.reg, disp.InstanceSetID, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
-       disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, disp.ArvClient)
+       disp.pool = worker.NewPool(disp.logger, disp.ArvClient, disp.Registry, disp.InstanceSetID, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
+       disp.queue = container.NewQueue(disp.logger, disp.Registry, disp.typeChooser, disp.ArvClient)
 
        if disp.Cluster.ManagementToken == "" {
                disp.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -154,7 +153,7 @@ func (disp *dispatcher) initialize() {
                mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/drain", disp.apiInstanceDrain)
                mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/run", disp.apiInstanceRun)
                mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/kill", disp.apiInstanceKill)
-               metricsH := promhttp.HandlerFor(disp.reg, promhttp.HandlerOpts{
+               metricsH := promhttp.HandlerFor(disp.Registry, promhttp.HandlerOpts{
                        ErrorLog: disp.logger,
                })
                mux.Handler("GET", "/metrics", metricsH)
index 4a5ca3017afdab64ac288fcfbc7147dcf90e3027..1972468f47d697a6b210aa40653c80a4f5baab77 100644 (file)
@@ -19,6 +19,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
        "golang.org/x/crypto/ssh"
        check "gopkg.in/check.v1"
 )
@@ -91,6 +92,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
                Context:   s.ctx,
                ArvClient: arvClient,
                AuthToken: arvadostest.AdminToken,
+               Registry:  prometheus.NewRegistry(),
        }
        // Test cases can modify s.cluster before calling
        // initialize(), and then modify private state before calling
index b6737bc553d61258373d578fdac416452105ec43..0391c5a043dd75e6d3cdaa38f6ba31c9c4dec3ca 100644 (file)
@@ -12,7 +12,6 @@ import (
        "io"
        "net"
        "net/http"
-       "net/url"
        "os"
        "strings"
 
@@ -22,6 +21,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/coreos/go-systemd/daemon"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
 )
 
@@ -30,7 +30,7 @@ type Handler interface {
        CheckHealth() error
 }
 
-type NewHandlerFunc func(_ context.Context, _ *arvados.Cluster, token string) Handler
+type NewHandlerFunc func(_ context.Context, _ *arvados.Cluster, token string, registry *prometheus.Registry) Handler
 
 type command struct {
        newHandler NewHandlerFunc
@@ -68,7 +68,6 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
        loader := config.NewLoader(stdin, log)
        loader.SetupFlags(flags)
        versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0")
-
        err = flags.Parse(args)
        if err == flag.ErrHelp {
                err = nil
@@ -79,6 +78,14 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                return cmd.Version.RunCommand(prog, args, stdin, stdout, stderr)
        }
 
+       if strings.HasSuffix(prog, "controller") {
+               // Some config-loader checks try to make API calls via
+               // controller. Those can't be expected to work if this
+               // process _is_ the controller: we haven't started an
+               // http server yet.
+               loader.SkipAPICalls = true
+       }
+
        cfg, err := loader.Load()
        if err != nil {
                return 1
@@ -87,54 +94,57 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
        if err != nil {
                return 1
        }
-       log = ctxlog.New(stderr, cluster.SystemLogs.Format, cluster.SystemLogs.LogLevel).WithFields(logrus.Fields{
+
+       // Now that we've read the config, replace the bootstrap
+       // logger with a new one according to the logging config.
+       log = ctxlog.New(stderr, cluster.SystemLogs.Format, cluster.SystemLogs.LogLevel)
+       logger := log.WithFields(logrus.Fields{
                "PID": os.Getpid(),
        })
-       ctx := ctxlog.Context(c.ctx, log)
+       ctx := ctxlog.Context(c.ctx, logger)
 
-       listen, err := getListenAddr(cluster.Services, c.svcName)
+       listenURL, err := getListenAddr(cluster.Services, c.svcName, log)
        if err != nil {
                return 1
        }
+       ctx = context.WithValue(ctx, contextKeyURL{}, listenURL)
 
-       if cluster.SystemRootToken == "" {
-               log.Warn("SystemRootToken missing from cluster config, falling back to ARVADOS_API_TOKEN environment variable")
-               cluster.SystemRootToken = os.Getenv("ARVADOS_API_TOKEN")
-       }
-       if cluster.Services.Controller.ExternalURL.Host == "" {
-               log.Warn("Services.Controller.ExternalURL missing from cluster config, falling back to ARVADOS_API_HOST(_INSECURE) environment variables")
-               u, err := url.Parse("https://" + os.Getenv("ARVADOS_API_HOST"))
-               if err != nil {
-                       err = fmt.Errorf("ARVADOS_API_HOST: %s", err)
-                       return 1
-               }
-               cluster.Services.Controller.ExternalURL = arvados.URL(*u)
-               if i := os.Getenv("ARVADOS_API_HOST_INSECURE"); i != "" && i != "0" {
-                       cluster.TLS.Insecure = true
-               }
-       }
-
-       handler := c.newHandler(ctx, cluster, cluster.SystemRootToken)
+       reg := prometheus.NewRegistry()
+       handler := c.newHandler(ctx, cluster, cluster.SystemRootToken, reg)
        if err = handler.CheckHealth(); err != nil {
                return 1
        }
+
+       instrumented := httpserver.Instrument(reg, log,
+               httpserver.HandlerWithContext(ctx,
+                       httpserver.AddRequestIDs(
+                               httpserver.LogRequests(
+                                       httpserver.NewRequestLimiter(cluster.API.MaxConcurrentRequests, handler, reg)))))
        srv := &httpserver.Server{
                Server: http.Server{
-                       Handler: httpserver.HandlerWithContext(ctx,
-                               httpserver.AddRequestIDs(httpserver.LogRequests(handler))),
+                       Handler: instrumented.ServeAPI(cluster.ManagementToken, instrumented),
                },
-               Addr: listen,
+               Addr: listenURL.Host,
+       }
+       if listenURL.Scheme == "https" {
+               tlsconfig, err := tlsConfigWithCertUpdater(cluster, logger)
+               if err != nil {
+                       logger.WithError(err).Errorf("cannot start %s service on %s", c.svcName, listenURL.String())
+                       return 1
+               }
+               srv.TLSConfig = tlsconfig
        }
        err = srv.Start()
        if err != nil {
                return 1
        }
-       log.WithFields(logrus.Fields{
+       logger.WithFields(logrus.Fields{
+               "URL":     listenURL,
                "Listen":  srv.Addr,
                "Service": c.svcName,
        }).Info("listening")
        if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
-               log.WithError(err).Errorf("error notifying init daemon")
+               logger.WithError(err).Errorf("error notifying init daemon")
        }
        go func() {
                <-ctx.Done()
@@ -149,20 +159,33 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 
 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
 
-func getListenAddr(svcs arvados.Services, prog arvados.ServiceName) (string, error) {
+func getListenAddr(svcs arvados.Services, prog arvados.ServiceName, log logrus.FieldLogger) (arvados.URL, error) {
        svc, ok := svcs.Map()[prog]
        if !ok {
-               return "", fmt.Errorf("unknown service name %q", prog)
+               return arvados.URL{}, fmt.Errorf("unknown service name %q", prog)
        }
        for url := range svc.InternalURLs {
                if strings.HasPrefix(url.Host, "localhost:") {
-                       return url.Host, nil
+                       return url, nil
                }
                listener, err := net.Listen("tcp", url.Host)
                if err == nil {
                        listener.Close()
-                       return url.Host, nil
+                       return url, nil
+               } else if strings.Contains(err.Error(), "cannot assign requested address") {
+                       continue
+               } else if strings.Contains(err.Error(), "address already in use") {
+                       return url, err
+               } else {
+                       log.Warn(err)
                }
        }
-       return "", fmt.Errorf("configuration does not enable the %s service on this host", prog)
+       return arvados.URL{}, fmt.Errorf("configuration does not enable the %s service on this host", prog)
+}
+
+type contextKeyURL struct{}
+
+func URLFromContext(ctx context.Context) (arvados.URL, bool) {
+       u, ok := ctx.Value(contextKeyURL{}).(arvados.URL)
+       return u, ok
 }
index bb7c5c51da01a4074da5b1b80506b2e25fc9a25d..ef047bc9da98bde99b451fb96601db86218d08d5 100644 (file)
@@ -8,14 +8,17 @@ package service
 import (
        "bytes"
        "context"
+       "crypto/tls"
        "fmt"
        "io/ioutil"
        "net/http"
        "os"
        "testing"
+       "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
        check "gopkg.in/check.v1"
 )
 
@@ -38,7 +41,7 @@ func (*Suite) TestCommand(c *check.C) {
        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
 
-       cmd := Command(arvados.ServiceNameController, func(ctx context.Context, _ *arvados.Cluster, token string) Handler {
+       cmd := Command(arvados.ServiceNameController, func(ctx context.Context, _ *arvados.Cluster, token string, reg *prometheus.Registry) Handler {
                c.Check(ctx.Value("foo"), check.Equals, "bar")
                c.Check(token, check.Equals, "abcde")
                return &testHandler{ctx: ctx, healthCheck: healthCheck}
@@ -62,12 +65,77 @@ func (*Suite) TestCommand(c *check.C) {
        c.Check(stderr.String(), check.Matches, `(?ms).*"msg":"CheckHealth called".*`)
 }
 
+func (*Suite) TestTLS(c *check.C) {
+       cwd, err := os.Getwd()
+       c.Assert(err, check.IsNil)
+
+       stdin := bytes.NewBufferString(`
+Clusters:
+ zzzzz:
+  SystemRootToken: abcde
+  Services:
+   Controller:
+    ExternalURL: "https://localhost:12345"
+    InternalURLs: {"https://localhost:12345": {}}
+  TLS:
+   Key: file://` + cwd + `/../../services/api/tmp/self-signed.key
+   Certificate: file://` + cwd + `/../../services/api/tmp/self-signed.pem
+`)
+
+       called := make(chan bool)
+       cmd := Command(arvados.ServiceNameController, func(ctx context.Context, _ *arvados.Cluster, token string, reg *prometheus.Registry) Handler {
+               return &testHandler{handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+                       w.Write([]byte("ok"))
+                       close(called)
+               })}
+       })
+
+       exited := make(chan bool)
+       var stdout, stderr bytes.Buffer
+       go func() {
+               cmd.RunCommand("arvados-controller", []string{"-config", "-"}, stdin, &stdout, &stderr)
+               close(exited)
+       }()
+       got := make(chan bool)
+       go func() {
+               defer close(got)
+               client := &http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}}
+               for range time.NewTicker(time.Millisecond).C {
+                       resp, err := client.Get("https://localhost:12345")
+                       if err != nil {
+                               c.Log(err)
+                               continue
+                       }
+                       body, err := ioutil.ReadAll(resp.Body)
+                       c.Logf("status %d, body %s", resp.StatusCode, string(body))
+                       c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+                       break
+               }
+       }()
+       select {
+       case <-called:
+       case <-exited:
+               c.Error("command exited without calling handler")
+       case <-time.After(time.Second):
+               c.Error("timed out")
+       }
+       select {
+       case <-got:
+       case <-exited:
+               c.Error("command exited before client received response")
+       case <-time.After(time.Second):
+               c.Error("timed out")
+       }
+       c.Log(stderr.String())
+}
+
 type testHandler struct {
        ctx         context.Context
+       handler     http.Handler
        healthCheck chan bool
 }
 
-func (th *testHandler) ServeHTTP(http.ResponseWriter, *http.Request) {}
+func (th *testHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { th.handler.ServeHTTP(w, r) }
 func (th *testHandler) CheckHealth() error {
        ctxlog.FromContext(th.ctx).Info("CheckHealth called")
        select {
diff --git a/lib/service/tls.go b/lib/service/tls.go
new file mode 100644 (file)
index 0000000..5f14bc5
--- /dev/null
@@ -0,0 +1,81 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package service
+
+import (
+       "crypto/tls"
+       "errors"
+       "fmt"
+       "os"
+       "os/signal"
+       "strings"
+       "syscall"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/sirupsen/logrus"
+)
+
+func tlsConfigWithCertUpdater(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) {
+       currentCert := make(chan *tls.Certificate, 1)
+       loaded := false
+
+       key, cert := cluster.TLS.Key, cluster.TLS.Certificate
+       if !strings.HasPrefix(key, "file://") || !strings.HasPrefix(cert, "file://") {
+               return nil, errors.New("cannot use TLS certificate: TLS.Key and TLS.Certificate must be specified as file://...")
+       }
+       key, cert = key[7:], cert[7:]
+
+       update := func() error {
+               cert, err := tls.LoadX509KeyPair(cert, key)
+               if err != nil {
+                       return fmt.Errorf("error loading X509 key pair: %s", err)
+               }
+               if loaded {
+                       // Throw away old cert
+                       <-currentCert
+               }
+               currentCert <- &cert
+               loaded = true
+               return nil
+       }
+       err := update()
+       if err != nil {
+               return nil, err
+       }
+
+       go func() {
+               reload := make(chan os.Signal, 1)
+               signal.Notify(reload, syscall.SIGHUP)
+               for range reload {
+                       err := update()
+                       if err != nil {
+                               logger.WithError(err).Warn("error updating TLS certificate")
+                       }
+               }
+       }()
+
+       // https://blog.gopheracademy.com/advent-2016/exposing-go-on-the-internet/
+       return &tls.Config{
+               PreferServerCipherSuites: true,
+               CurvePreferences: []tls.CurveID{
+                       tls.CurveP256,
+                       tls.X25519,
+               },
+               MinVersion: tls.VersionTLS12,
+               CipherSuites: []uint16{
+                       tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
+                       tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+                       tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,
+                       tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
+                       tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+                       tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+               },
+               GetCertificate: func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
+                       cert := <-currentCert
+                       currentCert <- cert
+                       return cert, nil
+               },
+       }, nil
+}
index bce7ce5f5cb7a55a083f68bc29d6a88008a03fef..f828e92bd6ede4f719d1c6c963b80a57905d6b5b 100644 (file)
@@ -36,6 +36,8 @@ Gem::Specification.new do |s|
   s.add_runtime_dependency 'andand', '~> 1.3', '>= 1.3.3'
   s.add_runtime_dependency 'oj', '~> 3.0'
   s.add_runtime_dependency 'curb', '~> 0.8'
+  # arvados-google-api-client 0.8.7.2 is incompatible with faraday 0.16.2
+  s.add_dependency('faraday', '< 0.16')
   s.homepage    =
     'https://arvados.org'
 end
index 237b2107f85f2607509893906052b72c965d8d32..39c50bcc837653d975e308a3ae9b567aa759a12b 100644 (file)
@@ -20,7 +20,7 @@ class TestCollectionCreate < Minitest::Test
                    manifest_text: foo_manifest
                  }.to_json)
     end
-    assert /^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out)
+    assert(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out))
     assert_equal '', err
   end
 
@@ -33,7 +33,7 @@ class TestCollectionCreate < Minitest::Test
         assert_arv('--format', 'uuid',
                    'collection', 'create', '--collection', tempfile.path)
       end
-      assert /^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out)
+      assert(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out))
       assert_equal '', err
     ensure
       tempfile.unlink
index c5ddacb7e011af44afa040377e2a0081eb074862..3a00fec94a6d585a0231d863a6f8c755d292dfff 100644 (file)
@@ -148,9 +148,9 @@ class TestArvGet < Minitest::Test
   def create_arv_object_with_value(value)
     out, err = capture_subprocess_io do
       system("arv", "tag", "add", value, "--object", "testing")
-      assert $?.success?, "Command failure running `arv tag`: #{$?}"
     end
     assert_equal '', err
+    assert $?.success?, "Command failure running `arv tag`: #{$?}"
     assert_operator 0, :<, out.strip.length
     out.strip
   end
index 2aa1e67248f2d01258a8241edd176d9aa9929425..f92e51b298c65c4dbebd04bf8b36903c18a25861 100644 (file)
@@ -21,7 +21,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false
     end
     assert_equal '', out
-    assert_match /^usage:/, err
+    assert_match(/^usage:/, err)
   end
 
   def test_get_version
@@ -38,7 +38,7 @@ class TestArvKeepGet < Minitest::Test
     end
     $stderr.write err
     assert_equal '', err
-    assert_match /^usage:/, out
+    assert_match(/^usage:/, out)
   end
 
   def test_file_to_dev_stdout
@@ -70,7 +70,7 @@ class TestArvKeepGet < Minitest::Test
     out, err = capture_subprocess_io do
       assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
     end
-    assert_match /Local file tmp\/foo already exists/, err
+    assert_match(/Local file tmp\/foo already exists/, err)
     assert_equal '', out
     assert_equal 'baz', IO.read('tmp/foo')
   end
@@ -82,7 +82,7 @@ class TestArvKeepGet < Minitest::Test
     out, err = capture_subprocess_io do
       assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
     end
-    assert_match /Local file tmp\/foo already exists/, err
+    assert_match(/Local file tmp\/foo already exists/, err)
     assert_equal '', out
     assert_equal 'baz', IO.read('tmp/foo')
   end
@@ -128,7 +128,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/foo'
     end
     assert_equal '', out
-    assert_match /^usage:/, err
+    assert_match(/^usage:/, err)
   end
 
   def test_dir_to_empty_string
@@ -136,7 +136,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false, @@foo_manifest_locator + '/', ''
     end
     assert_equal '', out
-    assert_match /^usage:/, err
+    assert_match(/^usage:/, err)
   end
 
   def test_nonexistent_block
@@ -144,7 +144,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
     end
     assert_equal '', out
-    assert_match /ERROR:/, err
+    assert_match(/ERROR:/, err)
   end
 
   def test_nonexistent_manifest
@@ -152,7 +152,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
     end
     assert_equal '', out
-    assert_match /ERROR:/, err
+    assert_match(/ERROR:/, err)
   end
 
   def test_manifest_root_to_dir
index 9ac722f6733dff2d5991ea8670ee420b316f0bbe..9fec78e34973f831dcd55dadb335c54a541615ed 100644 (file)
@@ -22,7 +22,7 @@ class TestArvKeepPut < Minitest::Test
     end
     $stderr.write err
     assert_empty err
-    assert_match /^usage:/, out
+    assert_match(/^usage:/, out)
   end
 
   def test_raw_stdin
@@ -65,7 +65,7 @@ class TestArvKeepPut < Minitest::Test
       assert_equal(false, arv_put('--filename', 'foo', './tmp/empty_dir/.'),
                    'arv-put --filename refuses directory')
     end
-    assert_match /^usage:.*error:/m, err
+    assert_match(/^usage:.*error:/m, err)
     assert_empty out
   end
 
@@ -76,7 +76,7 @@ class TestArvKeepPut < Minitest::Test
                                   './tmp/empty_file'),
                    'arv-put --filename refuses directory')
     end
-    assert_match /^usage:.*error:/m, err
+    assert_match(/^usage:.*error:/m, err)
     assert_empty out
   end
 
@@ -102,7 +102,7 @@ class TestArvKeepPut < Minitest::Test
     out, err = capture_subprocess_io do
       assert arv_put('--no-cache', '--manifest', '--progress', './tmp/foo')
     end
-    assert_match /%/, err
+    assert_match(/%/, err)
     assert match_collection_uuid(out)
   end
 
@@ -110,8 +110,8 @@ class TestArvKeepPut < Minitest::Test
     out, err = capture_subprocess_io do
       assert arv_put('--no-cache', '--manifest', '--batch-progress', './tmp/foo')
     end
-    assert_match /: 0 written 3 total/, err
-    assert_match /: 3 written 3 total/, err
+    assert_match(/: 0 written 3 total/, err)
+    assert_match(/: 3 written 3 total/, err)
     assert match_collection_uuid(out)
   end
 
@@ -121,7 +121,7 @@ class TestArvKeepPut < Minitest::Test
                    arv_put('--progress', '--batch-progress', './tmp/foo'),
                    'arv-put --progress --batch-progress is contradictory')
     end
-    assert_match /^usage:.*error:/m, err
+    assert_match(/^usage:.*error:/m, err)
     assert_empty out
   end
 
index 16542ba16b4fbb649722bc7de1a2388e30225188..895d0559e541b7da526c0cdd7961f4a09e737b30 100644 (file)
@@ -20,7 +20,7 @@ class TestArvTag < Minitest::Test
       assert_equal false, arv_tag
     end
     assert_empty out
-    assert_match /^usage:/i, err
+    assert_match(/^usage:/i, err)
   end
 
   # Test adding and removing a single tag on a single object.
index ffdb8656b41259362009c256d91c0661a9855d0d..49aac8a640206532259acb577b9f8a4751be24b0 100644 (file)
@@ -9,14 +9,14 @@ class TestArvWs < Minitest::Test
   end
 
   def test_arv_ws_get_help
-    out, err = capture_subprocess_io do
+    _, err = capture_subprocess_io do
       system ('arv-ws -h')
     end
     assert_equal '', err
   end
 
   def test_arv_ws_such_option
-    out, err = capture_subprocess_io do
+    _, err = capture_subprocess_io do
       system ('arv-ws --junk')
     end
     refute_equal '', err
index c6bcd04776920186d9973bdf662c1fe060b9b8c2..604ad39de78877b96121fae730eb12ea0da080d6 100644 (file)
@@ -295,6 +295,12 @@ class ArvadosWorkflow(Workflow):
         self.loadingContext.metadata = self.loadingContext.metadata.copy()
         self.loadingContext.metadata["http://commonwl.org/cwltool#original_cwlVersion"] = "v1.0"
 
+        if len(job_res_reqs) == 1:
+            # RAM request needs to be at least 128 MiB or the workflow
+            # runner itself won't run reliably.
+            if job_res_reqs[0].get("ramMin", 1024) < 128:
+                job_res_reqs[0]["ramMin"] = 128
+
         wf_runner = cmap({
             "class": "CommandLineTool",
             "baseCommand": "cwltool",
index 0eb606d25c276f8a793293b3212785bccbf8c5e2..99aee3795678243f4298e798cdcfe744b4a7f08a 100644 (file)
   tool: wf/runin-reqs-wf4.cwl
   doc: "RunInSingleContainer discovers static resource request in subworkflow steps"
 
+- job: null
+  output:
+    out: null
+  tool: wf/runin-reqs-wf5.cwl
+  doc: "RunInSingleContainer has minimum 128 MiB RAM"
+
 - job: secondaryFiles/inp3.yml
   output: {}
   tool: secondaryFiles/example1.cwl
index e8bbc08d8a27ae12deee011d7e93d9dca24cc5fe..3cc52936eaa89781599920d43fe2130b322bd46b 100644 (file)
@@ -34,7 +34,7 @@ steps:
     hints:
       - class: arv:RunInSingleContainer
       - class: ResourceRequirement
-        ramMin: $(inputs.count*32)
+        ramMin: $(96+inputs.count*32)
       - class: arv:APIRequirement
     scatter: count
     run:
@@ -58,4 +58,4 @@ steps:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), $(inputs.count * 32)]
+            arguments: [python, $(inputs.script), $(96+inputs.count * 32)]
index 15cc8df6a229e351f0be880dbd7be69c8b78c152..7d06cb308cf6ba8c65d51882865e90b39acae6c4 100644 (file)
@@ -44,7 +44,7 @@ steps:
       outputs: []
       hints:
         - class: ResourceRequirement
-          ramMin: $(inputs.count*32)
+          ramMin: $(96+inputs.count*32)
       steps:
         sleep1:
           in:
@@ -59,4 +59,4 @@ steps:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), $(inputs.count * 32)]
+            arguments: [python, $(inputs.script), $(96+inputs.count * 32)]
index e07ff8deff08514669df5a5010d0027e5448a5ce..c13b7a0bc6aa7b1caebcae0db1426d17bc0625f6 100644 (file)
@@ -50,10 +50,10 @@ steps:
             id: subtool
             hints:
               - class: ResourceRequirement
-                ramMin: $(inputs.count*32)
+                ramMin: $(96+inputs.count*32)
             inputs:
               count:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), $(inputs.count * 32)]
+            arguments: [python, $(inputs.script), $(96+inputs.count * 32)]
index 05e73c8df5c6f5cbbf709bf6b9831c1420978622..9a26d01132cf07d6ae1cb11aff97a627807ddca5 100644 (file)
@@ -53,10 +53,10 @@ steps:
             id: subtool
             hints:
               - class: ResourceRequirement
-                ramMin: 32
+                ramMin: 128
             inputs:
               count:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), "32"]
+            arguments: [python, $(inputs.script), "128"]
diff --git a/sdk/cwl/tests/wf/runin-reqs-wf5.cwl b/sdk/cwl/tests/wf/runin-reqs-wf5.cwl
new file mode 100644 (file)
index 0000000..34d7b2c
--- /dev/null
@@ -0,0 +1,62 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+class: Workflow
+cwlVersion: v1.0
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+inputs:
+  count:
+    type: int[]
+    default: [1, 2, 3, 4]
+  script:
+    type: File
+    default:
+      class: File
+      location: check_mem.py
+outputs:
+  out: []
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+  InlineJavascriptRequirement: {}
+  StepInputExpressionRequirement: {}
+hints:
+  DockerRequirement:
+    dockerPull: arvados/jobs:1.4.0.20190604172024
+steps:
+  substep:
+    in:
+      count: count
+      script: script
+    out: []
+    hints:
+      - class: arv:RunInSingleContainer
+      - class: arv:APIRequirement
+    scatter: count
+    run:
+      class: Workflow
+      id: mysub
+      inputs:
+        count: int
+        script: File
+      outputs: []
+      steps:
+        sleep1:
+          in:
+            count: count
+            script: script
+          out: []
+          run:
+            class: CommandLineTool
+            id: subtool
+            hints:
+              - class: ResourceRequirement
+                ramMin: 32
+            inputs:
+              count:
+                type: int
+              script: File
+            outputs: []
+            arguments: [python, $(inputs.script), "128"]
index 29dd62ac1eb2ca3f0c224bd08033284451f45c05..7c1c3538094869ff82a510226575a2dbbd0491ab 100644 (file)
@@ -81,6 +81,8 @@ type Cluster struct {
                DisabledAPIs                   StringSet
                MaxIndexDatabaseRead           int
                MaxItemsPerResponse            int
+               MaxConcurrentRequests          int
+               MaxKeepBlobBuffers             int
                MaxRequestAmplification        int
                MaxRequestSize                 int
                RailsSessionSecretToken        string
@@ -96,13 +98,19 @@ type Cluster struct {
                UnloggedAttributes StringSet
        }
        Collections struct {
-               BlobSigning          bool
-               BlobSigningKey       string
-               BlobSigningTTL       Duration
-               CollectionVersioning bool
-               DefaultTrashLifetime Duration
-               DefaultReplication   int
-               ManagedProperties    map[string]struct {
+               BlobSigning              bool
+               BlobSigningKey           string
+               BlobSigningTTL           Duration
+               BlobTrash                bool
+               BlobTrashLifetime        Duration
+               BlobTrashCheckInterval   Duration
+               BlobTrashConcurrency     int
+               BlobDeleteConcurrency    int
+               BlobReplicateConcurrency int
+               CollectionVersioning     bool
+               DefaultTrashLifetime     Duration
+               DefaultReplication       int
+               ManagedProperties        map[string]struct {
                        Value     interface{}
                        Function  string
                        Protected bool
@@ -111,6 +119,11 @@ type Cluster struct {
                TrashSweepInterval    Duration
                TrustAllContent       bool
 
+               BlobMissingReport        string
+               BalancePeriod            Duration
+               BalanceCollectionBatch   int
+               BalanceCollectionBuffers int
+
                WebDAVCache WebDAVCacheConfig
        }
        Git struct {
@@ -159,6 +172,7 @@ type Cluster struct {
                UserNotifierEmailFrom                 string
                UserProfileNotificationAddress        string
        }
+       Volumes   map[string]Volume
        Workbench struct {
                ActivationContactLink            string
                APIClientConnectTimeout          Duration
@@ -198,6 +212,48 @@ type Cluster struct {
        EnableBetaController14287 bool
 }
 
+type Volume struct {
+       AccessViaHosts   map[URL]VolumeAccess
+       ReadOnly         bool
+       Replication      int
+       StorageClasses   map[string]bool
+       Driver           string
+       DriverParameters json.RawMessage
+}
+
+type S3VolumeDriverParameters struct {
+       AccessKey          string
+       SecretKey          string
+       Endpoint           string
+       Region             string
+       Bucket             string
+       LocationConstraint bool
+       IndexPageSize      int
+       ConnectTimeout     Duration
+       ReadTimeout        Duration
+       RaceWindow         Duration
+       UnsafeDelete       bool
+}
+
+type AzureVolumeDriverParameters struct {
+       StorageAccountName   string
+       StorageAccountKey    string
+       StorageBaseURL       string
+       ContainerName        string
+       RequestTimeout       Duration
+       ListBlobsRetryDelay  Duration
+       ListBlobsMaxAttempts int
+}
+
+type DirectoryVolumeDriverParameters struct {
+       Root      string
+       Serialize bool
+}
+
+type VolumeAccess struct {
+       ReadOnly bool
+}
+
 type Services struct {
        Composer       Service
        Controller     Service
@@ -241,7 +297,13 @@ func (su URL) MarshalText() ([]byte, error) {
        return []byte(fmt.Sprintf("%s", (*url.URL)(&su).String())), nil
 }
 
-type ServiceInstance struct{}
+func (su URL) String() string {
+       return (*url.URL)(&su).String()
+}
+
+type ServiceInstance struct {
+       Rendezvous string `json:",omitempty"`
+}
 
 type PostgreSQL struct {
        Connection     PostgreSQLConnection
index 0c866354aa9b1e3a34833f15018b66613d40bdb4..97a62fa7bb3933b89e83e428fc4da39de7453fcd 100644 (file)
@@ -10,24 +10,27 @@ import (
        "net/http"
        "strconv"
        "strings"
+       "time"
 )
 
 // KeepService is an arvados#keepService record
 type KeepService struct {
-       UUID           string `json:"uuid"`
-       ServiceHost    string `json:"service_host"`
-       ServicePort    int    `json:"service_port"`
-       ServiceSSLFlag bool   `json:"service_ssl_flag"`
-       ServiceType    string `json:"service_type"`
-       ReadOnly       bool   `json:"read_only"`
+       UUID           string    `json:"uuid"`
+       ServiceHost    string    `json:"service_host"`
+       ServicePort    int       `json:"service_port"`
+       ServiceSSLFlag bool      `json:"service_ssl_flag"`
+       ServiceType    string    `json:"service_type"`
+       ReadOnly       bool      `json:"read_only"`
+       CreatedAt      time.Time `json:"created_at"`
+       ModifiedAt     time.Time `json:"modified_at"`
 }
 
 type KeepMount struct {
-       UUID           string   `json:"uuid"`
-       DeviceID       string   `json:"device_id"`
-       ReadOnly       bool     `json:"read_only"`
-       Replication    int      `json:"replication"`
-       StorageClasses []string `json:"storage_classes"`
+       UUID           string          `json:"uuid"`
+       DeviceID       string          `json:"device_id"`
+       ReadOnly       bool            `json:"read_only"`
+       Replication    int             `json:"replication"`
+       StorageClasses map[string]bool `json:"storage_classes"`
 }
 
 // KeepServiceList is an arvados#keepServiceList record
index 490a7f3e03b470296edfb671e82c864cb23076b6..5b01db5c4bbda594fe1c5fc5353a47bf2c60e49e 100644 (file)
@@ -111,16 +111,16 @@ func StopAPI() {
 }
 
 // StartKeep starts the given number of keep servers,
-// optionally with -enforce-permissions enabled.
-// Use numKeepServers = 2 and enforcePermissions = false under all normal circumstances.
-func StartKeep(numKeepServers int, enforcePermissions bool) {
+// optionally with --keep-blob-signing enabled.
+// Use numKeepServers = 2 and blobSigning = false under all normal circumstances.
+func StartKeep(numKeepServers int, blobSigning bool) {
        cwd, _ := os.Getwd()
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
        cmdArgs := []string{"run_test_server.py", "start_keep", "--num-keep-servers", strconv.Itoa(numKeepServers)}
-       if enforcePermissions {
-               cmdArgs = append(cmdArgs, "--keep-enforce-permissions")
+       if blobSigning {
+               cmdArgs = append(cmdArgs, "--keep-blob-signing")
        }
 
        bgRun(exec.Command("python", cmdArgs...))
index e66eeadee1e1fc8d6b50cd3e10fa59e8a5a66a80..a17ad8d83614416222f56e0b950061886183822c 100644 (file)
@@ -11,7 +11,6 @@ import (
        "os"
 
        "github.com/sirupsen/logrus"
-       check "gopkg.in/check.v1"
 )
 
 var (
@@ -41,7 +40,7 @@ func FromContext(ctx context.Context) logrus.FieldLogger {
 
 // New returns a new logger with the indicated format and
 // level.
-func New(out io.Writer, format, level string) logrus.FieldLogger {
+func New(out io.Writer, format, level string) *logrus.Logger {
        logger := logrus.New()
        logger.Out = out
        setFormat(logger, format)
@@ -49,7 +48,7 @@ func New(out io.Writer, format, level string) logrus.FieldLogger {
        return logger
 }
 
-func TestLogger(c *check.C) logrus.FieldLogger {
+func TestLogger(c interface{ Log(...interface{}) }) *logrus.Logger {
        logger := logrus.New()
        logger.Out = &logWriter{c.Log}
        setFormat(logger, "text")
index a94146f850f712888331912a2d2c66b6458c92a1..627e04f0be53d16814fba92399e4ca11d87891ec 100644 (file)
@@ -43,7 +43,12 @@ func (srv *Server) Start() error {
        srv.cond = sync.NewCond(mutex.RLocker())
        srv.running = true
        go func() {
-               err = srv.Serve(tcpKeepAliveListener{srv.listener})
+               lnr := tcpKeepAliveListener{srv.listener}
+               if srv.TLSConfig != nil {
+                       err = srv.ServeTLS(lnr, "", "")
+               } else {
+                       err = srv.Serve(lnr)
+               }
                if !srv.wantDown {
                        srv.err = err
                }
index e7192d5b4f407560d1545ac02264c1f11c70684d..23e6e016d303bbc78abefcc39bfa0fb65b8ef0fe 100644 (file)
@@ -6,6 +6,9 @@ package httpserver
 
 import (
        "net/http"
+       "sync/atomic"
+
+       "github.com/prometheus/client_golang/prometheus"
 )
 
 // RequestCounter is an http.Handler that tracks the number of
@@ -24,19 +27,45 @@ type RequestCounter interface {
 type limiterHandler struct {
        requests chan struct{}
        handler  http.Handler
+       count    int64 // only used if cap(requests)==0
 }
 
 // NewRequestLimiter returns a RequestCounter that delegates up to
 // maxRequests at a time to the given handler, and responds 503 to all
 // incoming requests beyond that limit.
-func NewRequestLimiter(maxRequests int, handler http.Handler) RequestCounter {
-       return &limiterHandler{
+//
+// "concurrent_requests" and "max_concurrent_requests" metrics are
+// registered with the given reg, if reg is not nil.
+func NewRequestLimiter(maxRequests int, handler http.Handler, reg *prometheus.Registry) RequestCounter {
+       h := &limiterHandler{
                requests: make(chan struct{}, maxRequests),
                handler:  handler,
        }
+       if reg != nil {
+               reg.MustRegister(prometheus.NewGaugeFunc(
+                       prometheus.GaugeOpts{
+                               Namespace: "arvados",
+                               Name:      "concurrent_requests",
+                               Help:      "Number of requests in progress",
+                       },
+                       func() float64 { return float64(h.Current()) },
+               ))
+               reg.MustRegister(prometheus.NewGaugeFunc(
+                       prometheus.GaugeOpts{
+                               Namespace: "arvados",
+                               Name:      "max_concurrent_requests",
+                               Help:      "Maximum number of concurrent requests",
+                       },
+                       func() float64 { return float64(h.Max()) },
+               ))
+       }
+       return h
 }
 
 func (h *limiterHandler) Current() int {
+       if cap(h.requests) == 0 {
+               return int(atomic.LoadInt64(&h.count))
+       }
        return len(h.requests)
 }
 
@@ -45,6 +74,11 @@ func (h *limiterHandler) Max() int {
 }
 
 func (h *limiterHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       if cap(h.requests) == 0 {
+               atomic.AddInt64(&h.count, 1)
+               h.handler.ServeHTTP(resp, req)
+               atomic.AddInt64(&h.count, -1)
+       }
        select {
        case h.requests <- struct{}{}:
        default:
index afa4e3faa78eae9773c1989595b564a7ffc07561..64d1f3d4cfb3fc47930ad1d655ae97366af6efb0 100644 (file)
@@ -31,7 +31,7 @@ func newTestHandler(maxReqs int) *testHandler {
 
 func TestRequestLimiter1(t *testing.T) {
        h := newTestHandler(10)
-       l := NewRequestLimiter(1, h)
+       l := NewRequestLimiter(1, h, nil)
        var wg sync.WaitGroup
        resps := make([]*httptest.ResponseRecorder, 10)
        for i := 0; i < 10; i++ {
@@ -91,7 +91,7 @@ func TestRequestLimiter1(t *testing.T) {
 
 func TestRequestLimiter10(t *testing.T) {
        h := newTestHandler(10)
-       l := NewRequestLimiter(10, h)
+       l := NewRequestLimiter(10, h, nil)
        var wg sync.WaitGroup
        for i := 0; i < 10; i++ {
                wg.Add(1)
index 34342059f3062820313127514c47e0061f080a77..48aabbbe409a5c672d7917ae8b57b73973fd7bec 100644 (file)
@@ -399,9 +399,9 @@ def get_config():
     with open(os.environ["ARVADOS_CONFIG"]) as f:
         return yaml.safe_load(f)
 
-def internal_port_from_config(service):
+def internal_port_from_config(service, idx=0):
     return int(urlparse(
-        list(get_config()["Clusters"]["zzzzz"]["Services"][service]["InternalURLs"].keys())[0]).
+        sorted(list(get_config()["Clusters"]["zzzzz"]["Services"][service]["InternalURLs"].keys()))[idx]).
                netloc.split(":")[1])
 
 def external_port_from_config(service):
@@ -444,47 +444,41 @@ def stop_ws():
         return
     kill_server_pid(_pidfile('ws'))
 
-def _start_keep(n, keep_args):
-    keep0 = tempfile.mkdtemp()
-    port = find_available_port()
-    keep_cmd = ["keepstore",
-                "-volume={}".format(keep0),
-                "-listen=:{}".format(port),
-                "-pid="+_pidfile('keep{}'.format(n))]
-
-    for arg, val in keep_args.items():
-        keep_cmd.append("{}={}".format(arg, val))
+def _start_keep(n, blob_signing=False):
+    datadir = os.path.join(TEST_TMPDIR, "keep%d.data"%n)
+    if os.path.exists(datadir):
+        shutil.rmtree(datadir)
+    os.mkdir(datadir)
+    port = internal_port_from_config("Keepstore", idx=n)
+
+    # Currently, if there are multiple InternalURLs for a single host,
+    # the only way to tell a keepstore process which one it's supposed
+    # to listen on is to supply a redacted version of the config, with
+    # the other InternalURLs removed.
+    conf = os.path.join(TEST_TMPDIR, "keep%d.yaml"%n)
+    confdata = get_config()
+    confdata['Clusters']['zzzzz']['Services']['Keepstore']['InternalURLs'] = {"http://127.0.0.1:%d"%port: {}}
+    confdata['Clusters']['zzzzz']['Collections']['BlobSigning'] = blob_signing
+    with open(conf, 'w') as f:
+        yaml.safe_dump(confdata, f)
+    keep_cmd = ["keepstore", "-config", conf]
 
     with open(_logfilename('keep{}'.format(n)), 'a') as logf:
         with open('/dev/null') as _stdin:
-            kp0 = subprocess.Popen(
+            child = subprocess.Popen(
                 keep_cmd, stdin=_stdin, stdout=logf, stderr=logf, close_fds=True)
 
+    print('child.pid is %d'%child.pid, file=sys.stderr)
     with open(_pidfile('keep{}'.format(n)), 'w') as f:
-        f.write(str(kp0.pid))
-
-    with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'w') as f:
-        f.write(keep0)
+        f.write(str(child.pid))
 
     _wait_until_port_listens(port)
 
     return port
 
-def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
+def run_keep(num_servers=2, **kwargs):
     stop_keep(num_servers)
 
-    keep_args = {}
-    if not blob_signing_key:
-        blob_signing_key = 'zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc'
-    with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
-        keep_args['-blob-signing-key-file'] = f.name
-        f.write(blob_signing_key)
-    keep_args['-enforce-permissions'] = str(enforce_permissions).lower()
-    with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
-        keep_args['-data-manager-token-file'] = f.name
-        f.write(auth_token('data_manager'))
-    keep_args['-never-delete'] = 'false'
-
     api = arvados.api(
         version='v1',
         host=os.environ['ARVADOS_API_HOST'],
@@ -497,7 +491,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
         api.keep_disks().delete(uuid=d['uuid']).execute()
 
     for d in range(0, num_servers):
-        port = _start_keep(d, keep_args)
+        port = _start_keep(d, **kwargs)
         svc = api.keep_services().create(body={'keep_service': {
             'uuid': 'zzzzz-bi6l4-keepdisk{:07d}'.format(d),
             'service_host': 'localhost',
@@ -522,12 +516,6 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
 
 def _stop_keep(n):
     kill_server_pid(_pidfile('keep{}'.format(n)))
-    if os.path.exists("{}/keep{}.volume".format(TEST_TMPDIR, n)):
-        with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'r') as r:
-            shutil.rmtree(r.read(), True)
-        os.unlink("{}/keep{}.volume".format(TEST_TMPDIR, n))
-    if os.path.exists(os.path.join(TEST_TMPDIR, "keep.blob_signing_key")):
-        os.remove(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"))
 
 def stop_keep(num_servers=2):
     for n in range(0, num_servers):
@@ -652,6 +640,7 @@ def run_nginx():
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _wait_until_port_listens(nginxconf['CONTROLLERSSLPORT'])
 
 def setup_config():
     rails_api_port = find_available_port()
@@ -663,6 +652,7 @@ def setup_config():
     git_httpd_external_port = find_available_port()
     keepproxy_port = find_available_port()
     keepproxy_external_port = find_available_port()
+    keepstore_ports = sorted([str(find_available_port()) for _ in xrange(0,4)])
     keep_web_port = find_available_port()
     keep_web_external_port = find_available_port()
     keep_web_dl_port = find_available_port()
@@ -678,45 +668,50 @@ def setup_config():
     services = {
         "RailsAPI": {
             "InternalURLs": {
-                "https://%s:%s"%(localhost, rails_api_port): {}
-            }
+                "https://%s:%s"%(localhost, rails_api_port): {},
+            },
         },
         "Controller": {
             "ExternalURL": "https://%s:%s" % (localhost, controller_external_port),
             "InternalURLs": {
-                "http://%s:%s"%(localhost, controller_port): {}
-            }
+                "http://%s:%s"%(localhost, controller_port): {},
+            },
         },
         "Websocket": {
             "ExternalURL": "wss://%s:%s/websocket" % (localhost, websocket_external_port),
             "InternalURLs": {
-                "http://%s:%s"%(localhost, websocket_port): {}
-            }
+                "http://%s:%s"%(localhost, websocket_port): {},
+            },
         },
         "GitHTTP": {
             "ExternalURL": "https://%s:%s" % (localhost, git_httpd_external_port),
             "InternalURLs": {
                 "http://%s:%s"%(localhost, git_httpd_port): {}
-            }
+            },
+        },
+        "Keepstore": {
+            "InternalURLs": {
+                "http://%s:%s"%(localhost, port): {} for port in keepstore_ports
+            },
         },
         "Keepproxy": {
             "ExternalURL": "https://%s:%s" % (localhost, keepproxy_external_port),
             "InternalURLs": {
-                "http://%s:%s"%(localhost, keepproxy_port): {}
-            }
+                "http://%s:%s"%(localhost, keepproxy_port): {},
+            },
         },
         "WebDAV": {
             "ExternalURL": "https://%s:%s" % (localhost, keep_web_external_port),
             "InternalURLs": {
-                "http://%s:%s"%(localhost, keep_web_port): {}
-            }
+                "http://%s:%s"%(localhost, keep_web_port): {},
+            },
         },
         "WebDAVDownload": {
             "ExternalURL": "https://%s:%s" % (localhost, keep_web_dl_external_port),
             "InternalURLs": {
-                "http://%s:%s"%(localhost, keep_web_dl_port): {}
-            }
-        }
+                "http://%s:%s"%(localhost, keep_web_dl_port): {},
+            },
+        },
     }
 
     config = {
@@ -724,30 +719,43 @@ def setup_config():
             "zzzzz": {
                 "EnableBetaController14287": ('14287' in os.environ.get('ARVADOS_EXPERIMENTAL', '')),
                 "ManagementToken": "e687950a23c3a9bceec28c6223a06c79",
+                "SystemRootToken": auth_token('data_manager'),
                 "API": {
-                    "RequestTimeout": "30s"
+                    "RequestTimeout": "30s",
                 },
                 "SystemLogs": {
-                    "LogLevel": ('info' if os.environ.get('ARVADOS_DEBUG', '') in ['','0'] else 'debug')
+                    "LogLevel": ('info' if os.environ.get('ARVADOS_DEBUG', '') in ['','0'] else 'debug'),
                 },
                 "PostgreSQL": {
                     "Connection": pgconnection,
                 },
                 "TLS": {
-                    "Insecure": True
+                    "Insecure": True,
                 },
                 "Services": services,
                 "Users": {
-                    "AnonymousUserToken": auth_token('anonymous')
+                    "AnonymousUserToken": auth_token('anonymous'),
                 },
                 "Collections": {
-                    "TrustAllContent": True
+                    "BlobSigningKey": "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc",
+                    "TrustAllContent": True,
                 },
                 "Git": {
-                    "Repositories": "%s/test" % os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
-                }
-            }
-        }
+                    "Repositories": "%s/test" % os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git'),
+                },
+                "Volumes": {
+                    "zzzzz-nyw5e-%015d"%n: {
+                        "AccessViaHosts": {
+                            "http://%s:%s" % (localhost, keepstore_ports[n]): {},
+                        },
+                        "Driver": "Directory",
+                        "DriverParameters": {
+                            "Root": os.path.join(TEST_TMPDIR, "keep%d.data"%n),
+                        },
+                    } for n in range(len(keepstore_ports))
+                },
+            },
+        },
     }
 
     conf = os.path.join(TEST_TMPDIR, 'arvados.yml')
@@ -864,7 +872,7 @@ if __name__ == "__main__":
     parser.add_argument('action', type=str, help="one of {}".format(actions))
     parser.add_argument('--auth', type=str, metavar='FIXTURE_NAME', help='Print authorization info for given api_client_authorizations fixture')
     parser.add_argument('--num-keep-servers', metavar='int', type=int, default=2, help="Number of keep servers desired")
-    parser.add_argument('--keep-enforce-permissions', action="store_true", help="Enforce keep permissions")
+    parser.add_argument('--keep-blob-signing', action="store_true", help="Enable blob signing for keepstore servers")
 
     args = parser.parse_args()
 
@@ -895,7 +903,7 @@ if __name__ == "__main__":
     elif args.action == 'stop_controller':
         stop_controller()
     elif args.action == 'start_keep':
-        run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
+        run_keep(blob_signing=args.keep_blob_signing, num_servers=args.num_keep_servers)
     elif args.action == 'stop_keep':
         stop_keep(num_servers=args.num_keep_servers)
     elif args.action == 'start_keep_proxy':
index 42adf2450d2d5a16cd499f342c3f2f243eefdb75..a8c4a853cf9f35f94ab04a191bc0188de723f2ad 100644 (file)
@@ -852,27 +852,8 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers,
 
 class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                             ArvadosBaseTestCase):
-    def _getKeepServerConfig():
-        for config_file, mandatory in [
-                ['application.yml', False], ['application.default.yml', True]]:
-            path = os.path.join(run_test_server.SERVICES_SRC_DIR,
-                                "api", "config", config_file)
-            if not mandatory and not os.path.exists(path):
-                continue
-            with open(path) as f:
-                rails_config = yaml.safe_load(f.read())
-                for config_section in ['test', 'common']:
-                    try:
-                        key = rails_config[config_section]["blob_signing_key"]
-                    except (KeyError, TypeError):
-                        pass
-                    else:
-                        return {'blob_signing_key': key,
-                                'enforce_permissions': True}
-        return {'blog_signing_key': None, 'enforce_permissions': False}
-
     MAIN_SERVER = {}
-    KEEP_SERVER = _getKeepServerConfig()
+    KEEP_SERVER = {'blob_signing': True}
     PROJECT_UUID = run_test_server.fixture('groups')['aproject']['uuid']
 
     @classmethod
index d6b3a2a12dc6f4d4d8bb997c25f43cc21e8cda62..80e6987b38bbaa7068db6a4978ef0c85a579bf96 100644 (file)
@@ -130,8 +130,7 @@ class KeepTestCase(run_test_server.TestCaseWithServers):
 
 class KeepPermissionTestCase(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
-    KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
-                   'enforce_permissions': True}
+    KEEP_SERVER = {'blob_signing': True}
 
     def test_KeepBasicRWTest(self):
         run_test_server.authorize_with('active')
@@ -173,70 +172,6 @@ class KeepPermissionTestCase(run_test_server.TestCaseWithServers):
                           unsigned_bar_locator)
 
 
-# KeepOptionalPermission: starts Keep with --permission-key-file
-# but not --enforce-permissions (i.e. generate signatures on PUT
-# requests, but do not require them for GET requests)
-#
-# All of these requests should succeed when permissions are optional:
-# * authenticated request, signed locator
-# * authenticated request, unsigned locator
-# * unauthenticated request, signed locator
-# * unauthenticated request, unsigned locator
-class KeepOptionalPermission(run_test_server.TestCaseWithServers):
-    MAIN_SERVER = {}
-    KEEP_SERVER = {'blob_signing_key': 'abcdefghijk0123456789',
-                   'enforce_permissions': False}
-
-    @classmethod
-    def setUpClass(cls):
-        super(KeepOptionalPermission, cls).setUpClass()
-        run_test_server.authorize_with("admin")
-        cls.api_client = arvados.api('v1')
-
-    def setUp(self):
-        super(KeepOptionalPermission, self).setUp()
-        self.keep_client = arvados.KeepClient(api_client=self.api_client,
-                                              proxy='', local_store='')
-
-    def _put_foo_and_check(self):
-        signed_locator = self.keep_client.put('foo')
-        self.assertRegex(
-            signed_locator,
-            r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
-            'invalid locator from Keep.put("foo"): ' + signed_locator)
-        return signed_locator
-
-    def test_KeepAuthenticatedSignedTest(self):
-        signed_locator = self._put_foo_and_check()
-        self.assertEqual(self.keep_client.get(signed_locator),
-                         b'foo',
-                         'wrong content from Keep.get(md5("foo"))')
-
-    def test_KeepAuthenticatedUnsignedTest(self):
-        signed_locator = self._put_foo_and_check()
-        self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
-                         b'foo',
-                         'wrong content from Keep.get(md5("foo"))')
-
-    def test_KeepUnauthenticatedSignedTest(self):
-        # Check that signed GET requests work even when permissions
-        # enforcement is off.
-        signed_locator = self._put_foo_and_check()
-        self.keep_client.api_token = ''
-        self.assertEqual(self.keep_client.get(signed_locator),
-                         b'foo',
-                         'wrong content from Keep.get(md5("foo"))')
-
-    def test_KeepUnauthenticatedUnsignedTest(self):
-        # Since --enforce-permissions is not in effect, GET requests
-        # need not be authenticated.
-        signed_locator = self._put_foo_and_check()
-        self.keep_client.api_token = ''
-        self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
-                         b'foo',
-                         'wrong content from Keep.get(md5("foo"))')
-
-
 class KeepProxyTestCase(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
     KEEP_SERVER = {}
index d4f04eb370659fac2ade716e302a301ea7494577..ef189bb7ac49fd7af7bad9fd313cf3ce942d5ff0 100644 (file)
@@ -33,6 +33,8 @@ Gem::Specification.new do |s|
   # work around undeclared dependency on i18n in some activesupport 3.x.x:
   s.add_dependency('i18n', '~> 0')
   s.add_dependency('json', '>= 1.7.7', '<3')
+  # arvados-google-api-client 0.8.7.2 is incompatible with faraday 0.16.2
+  s.add_dependency('faraday', '< 0.16')
   s.add_runtime_dependency('jwt', '<2', '>= 0.1.5')
   s.homepage    =
     'https://arvados.org'
index 90b0a9228b91a1f806d081ba15656bfa016b8ee0..c6e8894352f4f5f147c1d8e7f7d4c6d7db331aab 100644 (file)
@@ -10,17 +10,16 @@ class Arvados::V1::KeepServicesController < ApplicationController
 
   def find_objects_for_index
     # all users can list all keep services
-    @objects = model_class.where('1=1')
+    @objects = KeepService.all
     super
   end
 
   def accessible
     if request.headers['X-External-Client'] == '1'
-      @objects = model_class.where('service_type=?', 'proxy')
+      @objects = KeepService.where('service_type=?', 'proxy')
     else
-      @objects = model_class.where(model_class.arel_table[:service_type].not_eq('proxy'))
+      @objects = KeepService.where('service_type<>?', 'proxy')
     end
     render_list
   end
-
 end
index 8999b3e14e123b78f8ecfaaa2ea821d8fa6e3490..376be55ffbf1a762ae81c2ed3fbbf76292b87883 100644 (file)
@@ -423,6 +423,10 @@ class Container < ArvadosModel
     current_user.andand.is_admin
   end
 
+  def permission_to_destroy
+    current_user.andand.is_admin
+  end
+
   def ensure_owner_uuid_is_permitted
     # validate_change ensures owner_uuid can't be changed at all --
     # except during create, which requires admin privileges. Checking
index bf5f3ccc01a5b6022aceaf2e60fc6b582ff25cda..60f2632029dc4bf2cf2ead9dc19522551b8bbb10 100644 (file)
@@ -6,6 +6,9 @@ class KeepService < ArvadosModel
   include HasUuid
   include KindAndEtag
   include CommonApiTemplate
+  extend DbCurrentTime
+
+  SERVER_START_TIME = db_current_time
 
   api_accessible :user, extend: :common do |t|
     t.add  :service_host
@@ -17,6 +20,21 @@ class KeepService < ArvadosModel
   api_accessible :superuser, :extend => :user do |t|
   end
 
+  # return the set of keep services from the database (if this is an
+  # older installation or test system where entries have been added
+  # manually) or, preferably, the cluster config file.
+  def self.all *args
+    if super.count == 0
+      from_config
+    else
+      super
+    end
+  end
+
+  def self.where *args
+    all.where *args
+  end
+
   protected
 
   def permission_to_create
@@ -26,4 +44,44 @@ class KeepService < ArvadosModel
   def permission_to_update
     current_user.andand.is_admin
   end
+
+  def self.from_config
+    config_time = connection.quote(SERVER_START_TIME)
+    owner = connection.quote(system_user_uuid)
+    values = []
+    id = 1
+    Rails.configuration.Services.Keepstore.InternalURLs.each do |url, info|
+      values << "(#{id}, " + quoted_column_values_from_url(url: url.to_s, rendezvous: info.Rendezvous).join(", ") + ", 'disk', 'f'::bool, #{config_time}, #{config_time}, #{owner}, #{owner}, null)"
+      id += 1
+    end
+    url = Rails.configuration.Services.Keepproxy.ExternalURL.to_s
+    if !url.blank?
+      values << "(#{id}, " + quoted_column_values_from_url(url: url, rendezvous: "").join(", ") + ", 'proxy', 'f'::bool, #{config_time}, #{config_time}, #{owner}, #{owner}, null)"
+      id += 1
+    end
+    if values.length == 0
+      # return empty set as AR relation
+      return unscoped.where('1=0')
+    else
+      sql = "(values #{values.join(", ")}) as keep_services (id, uuid, service_host, service_port, service_ssl_flag, service_type, read_only, created_at, modified_at, owner_uuid, modified_by_user_uuid, modified_by_client_uuid)"
+      return unscoped.from(sql)
+    end
+  end
+
+  private
+
+  def self.quoted_column_values_from_url(url:, rendezvous:)
+    rvz = rendezvous
+    rvz = url if rvz.blank?
+    if /^[a-zA-Z0-9]{15}$/ !~ rvz
+      # If rvz is an URL (either the real service URL, or an alternate
+      # one specified in config in order to preserve rendezvous order
+      # when changing hosts/ports), hash it to get 15 alphanums.
+      rvz = Digest::MD5.hexdigest(rvz)[0..15]
+    end
+    uuid = Rails.configuration.ClusterID + "-bi6l4-" + rvz
+    uri = URI::parse(url)
+    [uuid, uri.host, uri.port].map { |x| connection.quote(x) } + [(uri.scheme == 'https' ? "'t'::bool" : "'f'::bool")]
+  end
+
 end
index 41554be87507ba19d9ed8edc40a8a1e2db6f0bad..867ab35e795f522370ca5b61e6a4c4a900ffabbc 100644 (file)
@@ -36,4 +36,33 @@ class Arvados::V1::KeepServicesControllerTest < ActionController::TestCase
     end
   end
 
+  test "report configured servers if db is empty" do
+    KeepService.unscoped.all.delete_all
+    expect_rvz = {}
+    n = 0
+    Rails.configuration.Services.Keepstore.InternalURLs.each do |k,v|
+      n += 1
+      rvz = "%015x" % n
+      expect_rvz[k.to_s] = rvz
+      Rails.configuration.Services.Keepstore.InternalURLs[k].Rendezvous = rvz
+    end
+    expect_rvz[Rails.configuration.Services.Keepproxy.ExternalURL] = true
+    refute_empty expect_rvz
+    authorize_with :active
+    get :index,
+      params: {:format => :json},
+      headers: auth(:active)
+    assert_response :success
+    json_response['items'].each do |svc|
+      url = "#{svc['service_ssl_flag'] ? 'https' : 'http'}://#{svc['service_host']}:#{svc['service_port']}"
+      assert_equal true, expect_rvz.has_key?(url), "#{url} does not match any configured service: expecting #{expect_rvz}"
+      rvz = expect_rvz[url]
+      if rvz.is_a? String
+        assert_equal "zzzzz-bi6l4-#{rvz}", svc['uuid'], "exported service UUID should match InternalURLs.*.Rendezvous value"
+      end
+      expect_rvz.delete(url)
+    end
+    assert_equal({}, expect_rvz, "all configured Keepstore and Keepproxy services should be returned")
+  end
+
 end
index 88fd5feb6ad27c3c55dd5531c0a2566422239f41..5f17efc4452c3ac24e5e53f9d532da1ce3b9d673 100644 (file)
@@ -980,6 +980,15 @@ class ContainerTest < ActiveSupport::TestCase
     end
   end
 
+  test "user cannot delete" do
+    set_user_from_auth :active
+    c, _ = minimal_new
+    assert_raises ArvadosModel::PermissionDeniedError do
+      c.destroy
+    end
+    assert Container.find_by_uuid(c.uuid)
+  end
+
   [
     {state: Container::Complete, exit_code: 0, output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'},
     {state: Container::Cancelled},
index 568570942e1f143c0f89725bc20cf372512d8983..c4ebb37edbe229d47f98d47ea402695ccd3e3169 100644 (file)
@@ -16,6 +16,7 @@ import (
        "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -38,7 +39,7 @@ func (s *AuthHandlerSuite) SetUpTest(c *check.C) {
        repoRoot, err := filepath.Abs("../api/tmp/git/test")
        c.Assert(err, check.IsNil)
 
-       cfg, err := config.NewLoader(nil, nil).Load()
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
        c.Assert(err, check.Equals, nil)
        s.cluster, err = cfg.GetCluster("")
        c.Assert(err, check.Equals, nil)
index d5cb275fd8fea3613584ff809095a5c8094407c5..1a4a984fb7265e8b7dacfd7e647706a0f48b81c8 100644 (file)
@@ -12,6 +12,7 @@ import (
 
        "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -22,7 +23,7 @@ type GitHandlerSuite struct {
 }
 
 func (s *GitHandlerSuite) SetUpTest(c *check.C) {
-       cfg, err := config.NewLoader(nil, nil).Load()
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
        c.Assert(err, check.Equals, nil)
        s.cluster, err = cfg.GetCluster("")
        c.Assert(err, check.Equals, nil)
index eaa7b55f8381f0b4097d751b2aad39866babe355..45a1e60ffcb57108eba3557cfbb09f2ab5150c91 100644 (file)
@@ -12,6 +12,7 @@ import (
 
        "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -48,7 +49,7 @@ func (s *GitoliteSuite) SetUpTest(c *check.C) {
 
        s.tmpRepoRoot = s.gitoliteHome + "/repositories"
 
-       cfg, err := config.NewLoader(nil, nil).Load()
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
        c.Assert(err, check.Equals, nil)
        s.cluster, err = cfg.GetCluster("")
        c.Assert(err, check.Equals, nil)
index 46bf8329c2dce429178071a6da72cfb6b90bb8b5..a408dd475f617fe850ee04aaf3d1e5158568ab6c 100644 (file)
@@ -15,6 +15,7 @@ import (
        "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -62,7 +63,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
        c.Assert(err, check.Equals, nil)
 
        if s.cluster == nil {
-               cfg, err := config.NewLoader(nil, nil).Load()
+               cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
                c.Assert(err, check.Equals, nil)
                s.cluster, err = cfg.GetCluster("")
                c.Assert(err, check.Equals, nil)
index 5796ad7a278cfbb79df34210d3c81880e979a67b..61ee24c911d7f7f2708a9fac71026e451c7264ef 100644 (file)
@@ -13,6 +13,7 @@ import (
        "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
        git_client "gopkg.in/src-d/go-git.v4/plumbing/transport/client"
        git_http "gopkg.in/src-d/go-git.v4/plumbing/transport/http"
@@ -205,7 +206,7 @@ func (s *GitMountSuite) checkTmpdirContents(c *check.C, expect []string) {
 func (*GitMountSuite) useTestGitServer(c *check.C) {
        git_client.InstallProtocol("https", git_http.NewClient(arvados.InsecureHTTPClient))
 
-       loader := config.NewLoader(nil, nil)
+       loader := config.NewLoader(nil, ctxlog.TestLogger(c))
        cfg, err := loader.Load()
        c.Assert(err, check.IsNil)
        cluster, err := cfg.GetCluster("")
index 7072eae6849fb14de624fe8b2d9def856f8cab5a..89b39dbc87e10677c3024d4566c9325cae756048 100644 (file)
@@ -61,7 +61,7 @@ class IntegrationTest(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         run_test_server.run()
-        run_test_server.run_keep(enforce_permissions=True, num_servers=2)
+        run_test_server.run_keep(blob_signing=True, num_servers=2)
 
     @classmethod
     def tearDownClass(cls):
index 192b1b5dcb9af83b08d9bc421bce705dc409df05..c90e7fd8df96967d009b981067cfbacfb9b2d0d3 100644 (file)
@@ -38,7 +38,7 @@ class ExecMode(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         run_test_server.run()
-        run_test_server.run_keep(enforce_permissions=True, num_servers=2)
+        run_test_server.run_keep(blob_signing=True, num_servers=2)
         run_test_server.authorize_with('active')
 
     @classmethod
index a06a4f884d2365da279e0a27b399fac409e25e7c..039954a000d0a6bf381b043dd5cc25459cb7c843 100644 (file)
@@ -12,6 +12,7 @@ import (
        "git.curoverse.com/arvados.git/lib/service"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/health"
+       "github.com/prometheus/client_golang/prometheus"
 )
 
 var (
@@ -19,7 +20,7 @@ var (
        command cmd.Handler = service.Command(arvados.ServiceNameHealth, newHandler)
 )
 
-func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string) service.Handler {
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string, _ *prometheus.Registry) service.Handler {
        return &health.Aggregator{Cluster: cluster}
 }
 
index 08a6c5881c51e32951d475ec49b37a833f360532..e50b0b505aee471f30918772f66f18ec0838e31d 100644 (file)
@@ -66,7 +66,7 @@ type Balancer struct {
 // Typical usage:
 //
 //   runOptions, err = (&Balancer{}).Run(config, runOptions)
-func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions RunOptions, err error) {
+func (bal *Balancer) Run(client *arvados.Client, cluster *arvados.Cluster, runOptions RunOptions) (nextRunOptions RunOptions, err error) {
        nextRunOptions = runOptions
 
        defer bal.time("sweep", "wall clock time to run one full sweep")()
@@ -95,24 +95,20 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
                bal.lostBlocks = ioutil.Discard
        }
 
-       if len(config.KeepServiceList.Items) > 0 {
-               err = bal.SetKeepServices(config.KeepServiceList)
-       } else {
-               err = bal.DiscoverKeepServices(&config.Client, config.KeepServiceTypes)
-       }
+       err = bal.DiscoverKeepServices(client)
        if err != nil {
                return
        }
 
        for _, srv := range bal.KeepServices {
-               err = srv.discoverMounts(&config.Client)
+               err = srv.discoverMounts(client)
                if err != nil {
                        return
                }
        }
        bal.cleanupMounts()
 
-       if err = bal.CheckSanityEarly(&config.Client); err != nil {
+       if err = bal.CheckSanityEarly(client); err != nil {
                return
        }
        rs := bal.rendezvousState()
@@ -121,7 +117,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
                        bal.logf("notice: KeepServices list has changed since last run")
                }
                bal.logf("clearing existing trash lists, in case the new rendezvous order differs from previous run")
-               if err = bal.ClearTrashLists(&config.Client); err != nil {
+               if err = bal.ClearTrashLists(client); err != nil {
                        return
                }
                // The current rendezvous state becomes "safe" (i.e.,
@@ -130,7 +126,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
                // succeed in clearing existing trash lists.
                nextRunOptions.SafeRendezvousState = rs
        }
-       if err = bal.GetCurrentState(&config.Client, config.CollectionBatchSize, config.CollectionBuffers); err != nil {
+       if err = bal.GetCurrentState(client, cluster.Collections.BalanceCollectionBatch, cluster.Collections.BalanceCollectionBuffers); err != nil {
                return
        }
        bal.ComputeChangeSets()
@@ -150,14 +146,14 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
                lbFile = nil
        }
        if runOptions.CommitPulls {
-               err = bal.CommitPulls(&config.Client)
+               err = bal.CommitPulls(client)
                if err != nil {
                        // Skip trash if we can't pull. (Too cautious?)
                        return
                }
        }
        if runOptions.CommitTrash {
-               err = bal.CommitTrash(&config.Client)
+               err = bal.CommitTrash(client)
        }
        return
 }
@@ -176,15 +172,11 @@ func (bal *Balancer) SetKeepServices(srvList arvados.KeepServiceList) error {
 
 // DiscoverKeepServices sets the list of KeepServices by calling the
 // API to get a list of all services, and selecting the ones whose
-// ServiceType is in okTypes.
-func (bal *Balancer) DiscoverKeepServices(c *arvados.Client, okTypes []string) error {
+// ServiceType is "disk"
+func (bal *Balancer) DiscoverKeepServices(c *arvados.Client) error {
        bal.KeepServices = make(map[string]*KeepService)
-       ok := make(map[string]bool)
-       for _, t := range okTypes {
-               ok[t] = true
-       }
        return c.EachKeepService(func(srv arvados.KeepService) error {
-               if ok[srv.ServiceType] {
+               if srv.ServiceType == "disk" {
                        bal.KeepServices[srv.UUID] = &KeepService{
                                KeepService: srv,
                                ChangeSet:   &ChangeSet{},
@@ -449,7 +441,7 @@ func (bal *Balancer) addCollection(coll arvados.Collection) error {
        if coll.ReplicationDesired != nil {
                repl = *coll.ReplicationDesired
        }
-       debugf("%v: %d block x%d", coll.UUID, len(blkids), repl)
+       bal.Logger.Debugf("%v: %d block x%d", coll.UUID, len(blkids), repl)
        // Pass pdh to IncreaseDesired only if LostBlocksFile is being
        // written -- otherwise it's just a waste of memory.
        pdh := ""
@@ -523,7 +515,7 @@ func (bal *Balancer) setupLookupTables() {
                                bal.mountsByClass["default"][mnt] = true
                                continue
                        }
-                       for _, class := range mnt.StorageClasses {
+                       for class := range mnt.StorageClasses {
                                if mbc := bal.mountsByClass[class]; mbc == nil {
                                        bal.classes = append(bal.classes, class)
                                        bal.mountsByClass[class] = map[*KeepMount]bool{mnt: true}
@@ -566,7 +558,7 @@ type balanceResult struct {
 // balanceBlock compares current state to desired state for a single
 // block, and makes the appropriate ChangeSet calls.
 func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) balanceResult {
-       debugf("balanceBlock: %v %+v", blkid, blk)
+       bal.Logger.Debugf("balanceBlock: %v %+v", blkid, blk)
 
        type slot struct {
                mnt  *KeepMount // never nil
index db530bc4926de88502132730f35c816ec3cf92b6..a3abc9f96a466ea00dab1047e3b9254d93fcebf9 100644 (file)
@@ -5,6 +5,7 @@
 package main
 
 import (
+       "bytes"
        "encoding/json"
        "fmt"
        "io"
@@ -16,8 +17,12 @@ import (
        "sync"
        "time"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/sirupsen/logrus"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/prometheus/common/expfmt"
        check "gopkg.in/check.v1"
 )
 
@@ -303,41 +308,36 @@ func (s *stubServer) serveKeepstorePull() *reqTracker {
 
 type runSuite struct {
        stub   stubServer
-       config Config
-}
-
-// make a log.Logger that writes to the current test's c.Log().
-func (s *runSuite) logger(c *check.C) *logrus.Logger {
-       r, w := io.Pipe()
-       go func() {
-               buf := make([]byte, 10000)
-               for {
-                       n, err := r.Read(buf)
-                       if n > 0 {
-                               if buf[n-1] == '\n' {
-                                       n--
-                               }
-                               c.Log(string(buf[:n]))
-                       }
-                       if err != nil {
-                               break
-                       }
-               }
-       }()
-       logger := logrus.New()
-       logger.Out = w
-       return logger
+       config *arvados.Cluster
+       client *arvados.Client
+}
+
+func (s *runSuite) newServer(options *RunOptions) *Server {
+       srv := &Server{
+               Cluster:    s.config,
+               ArvClient:  s.client,
+               RunOptions: *options,
+               Metrics:    newMetrics(prometheus.NewRegistry()),
+               Logger:     options.Logger,
+               Dumper:     options.Dumper,
+       }
+       return srv
 }
 
 func (s *runSuite) SetUpTest(c *check.C) {
-       s.config = Config{
-               Client: arvados.Client{
-                       AuthToken: "xyzzy",
-                       APIHost:   "zzzzz.arvadosapi.com",
-                       Client:    s.stub.Start()},
-               KeepServiceTypes: []string{"disk"},
-               RunPeriod:        arvados.Duration(time.Second),
-       }
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+       c.Assert(err, check.Equals, nil)
+       s.config, err = cfg.GetCluster("")
+       c.Assert(err, check.Equals, nil)
+
+       s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
+       arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")
+
+       s.client = &arvados.Client{
+               AuthToken: "xyzzy",
+               APIHost:   "zzzzz.arvadosapi.com",
+               Client:    s.stub.Start()}
+
        s.stub.serveDiscoveryDoc()
        s.stub.logf = c.Logf
 }
@@ -350,7 +350,7 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
        opts := RunOptions{
                CommitPulls: true,
                CommitTrash: true,
-               Logger:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserAdmin()
        s.stub.serveZeroCollections()
@@ -359,40 +359,18 @@ func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
        s.stub.serveKeepstoreIndexFoo4Bar1()
        trashReqs := s.stub.serveKeepstoreTrash()
        pullReqs := s.stub.serveKeepstorePull()
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
-       _, err = srv.Run()
+       srv := s.newServer(&opts)
+       _, err := srv.runOnce()
        c.Check(err, check.ErrorMatches, "received zero collections")
        c.Check(trashReqs.Count(), check.Equals, 4)
        c.Check(pullReqs.Count(), check.Equals, 0)
 }
 
-func (s *runSuite) TestServiceTypes(c *check.C) {
-       opts := RunOptions{
-               CommitPulls: true,
-               CommitTrash: true,
-               Logger:      s.logger(c),
-       }
-       s.config.KeepServiceTypes = []string{"unlisted-type"}
-       s.stub.serveCurrentUserAdmin()
-       s.stub.serveFooBarFileCollections()
-       s.stub.serveKeepServices(stubServices)
-       s.stub.serveKeepstoreMounts()
-       indexReqs := s.stub.serveKeepstoreIndexFoo4Bar1()
-       trashReqs := s.stub.serveKeepstoreTrash()
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
-       _, err = srv.Run()
-       c.Check(err, check.IsNil)
-       c.Check(indexReqs.Count(), check.Equals, 0)
-       c.Check(trashReqs.Count(), check.Equals, 0)
-}
-
 func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
        opts := RunOptions{
                CommitPulls: true,
                CommitTrash: true,
-               Logger:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserNotAdmin()
        s.stub.serveZeroCollections()
@@ -400,9 +378,8 @@ func (s *runSuite) TestRefuseNonAdmin(c *check.C) {
        s.stub.serveKeepstoreMounts()
        trashReqs := s.stub.serveKeepstoreTrash()
        pullReqs := s.stub.serveKeepstorePull()
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
-       _, err = srv.Run()
+       srv := s.newServer(&opts)
+       _, err := srv.runOnce()
        c.Check(err, check.ErrorMatches, "current user .* is not .* admin user")
        c.Check(trashReqs.Count(), check.Equals, 0)
        c.Check(pullReqs.Count(), check.Equals, 0)
@@ -412,7 +389,7 @@ func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
        opts := RunOptions{
                CommitPulls: true,
                CommitTrash: true,
-               Logger:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserAdmin()
        s.stub.serveCollectionsButSkipOne()
@@ -421,9 +398,8 @@ func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
        s.stub.serveKeepstoreIndexFoo4Bar1()
        trashReqs := s.stub.serveKeepstoreTrash()
        pullReqs := s.stub.serveKeepstorePull()
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
-       _, err = srv.Run()
+       srv := s.newServer(&opts)
+       _, err := srv.runOnce()
        c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
        c.Check(trashReqs.Count(), check.Equals, 4)
        c.Check(pullReqs.Count(), check.Equals, 0)
@@ -432,12 +408,12 @@ func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
 func (s *runSuite) TestWriteLostBlocks(c *check.C) {
        lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
        c.Assert(err, check.IsNil)
-       s.config.LostBlocksFile = lostf.Name()
+       s.config.Collections.BlobMissingReport = lostf.Name()
        defer os.Remove(lostf.Name())
        opts := RunOptions{
                CommitPulls: true,
                CommitTrash: true,
-               Logger:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserAdmin()
        s.stub.serveFooBarFileCollections()
@@ -446,9 +422,9 @@ func (s *runSuite) TestWriteLostBlocks(c *check.C) {
        s.stub.serveKeepstoreIndexFoo1()
        s.stub.serveKeepstoreTrash()
        s.stub.serveKeepstorePull()
-       srv, err := NewServer(s.config, opts)
+       srv := s.newServer(&opts)
        c.Assert(err, check.IsNil)
-       _, err = srv.Run()
+       _, err = srv.runOnce()
        c.Check(err, check.IsNil)
        lost, err := ioutil.ReadFile(lostf.Name())
        c.Assert(err, check.IsNil)
@@ -459,7 +435,7 @@ func (s *runSuite) TestDryRun(c *check.C) {
        opts := RunOptions{
                CommitPulls: false,
                CommitTrash: false,
-               Logger:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserAdmin()
        collReqs := s.stub.serveFooBarFileCollections()
@@ -468,9 +444,8 @@ func (s *runSuite) TestDryRun(c *check.C) {
        s.stub.serveKeepstoreIndexFoo4Bar1()
        trashReqs := s.stub.serveKeepstoreTrash()
        pullReqs := s.stub.serveKeepstorePull()
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
-       bal, err := srv.Run()
+       srv := s.newServer(&opts)
+       bal, err := srv.runOnce()
        c.Check(err, check.IsNil)
        for _, req := range collReqs.reqs {
                c.Check(req.Form.Get("include_trash"), check.Equals, "true")
@@ -486,16 +461,15 @@ func (s *runSuite) TestDryRun(c *check.C) {
 func (s *runSuite) TestCommit(c *check.C) {
        lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
        c.Assert(err, check.IsNil)
-       s.config.LostBlocksFile = lostf.Name()
+       s.config.Collections.BlobMissingReport = lostf.Name()
        defer os.Remove(lostf.Name())
 
-       s.config.Listen = ":"
        s.config.ManagementToken = "xyzzy"
        opts := RunOptions{
                CommitPulls: true,
                CommitTrash: true,
-               Logger:      s.logger(c),
-               Dumper:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
+               Dumper:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserAdmin()
        s.stub.serveFooBarFileCollections()
@@ -504,9 +478,8 @@ func (s *runSuite) TestCommit(c *check.C) {
        s.stub.serveKeepstoreIndexFoo4Bar1()
        trashReqs := s.stub.serveKeepstoreTrash()
        pullReqs := s.stub.serveKeepstorePull()
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
-       bal, err := srv.Run()
+       srv := s.newServer(&opts)
+       bal, err := srv.runOnce()
        c.Check(err, check.IsNil)
        c.Check(trashReqs.Count(), check.Equals, 8)
        c.Check(pullReqs.Count(), check.Equals, 4)
@@ -520,22 +493,22 @@ func (s *runSuite) TestCommit(c *check.C) {
        c.Assert(err, check.IsNil)
        c.Check(string(lost), check.Equals, "")
 
-       metrics := s.getMetrics(c, srv)
-       c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
-       c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
-       c.Check(metrics, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
-       c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
-       c.Check(metrics, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
+       buf, err := s.getMetrics(c, srv)
+       c.Check(err, check.IsNil)
+       c.Check(buf, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
+       c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
+       c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
+       c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
+       c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
 }
 
 func (s *runSuite) TestRunForever(c *check.C) {
-       s.config.Listen = ":"
        s.config.ManagementToken = "xyzzy"
        opts := RunOptions{
                CommitPulls: true,
                CommitTrash: true,
-               Logger:      s.logger(c),
-               Dumper:      s.logger(c),
+               Logger:      ctxlog.TestLogger(c),
+               Dumper:      ctxlog.TestLogger(c),
        }
        s.stub.serveCurrentUserAdmin()
        s.stub.serveFooBarFileCollections()
@@ -546,13 +519,12 @@ func (s *runSuite) TestRunForever(c *check.C) {
        pullReqs := s.stub.serveKeepstorePull()
 
        stop := make(chan interface{})
-       s.config.RunPeriod = arvados.Duration(time.Millisecond)
-       srv, err := NewServer(s.config, opts)
-       c.Assert(err, check.IsNil)
+       s.config.Collections.BalancePeriod = arvados.Duration(time.Millisecond)
+       srv := s.newServer(&opts)
 
        done := make(chan bool)
        go func() {
-               srv.RunForever(stop)
+               srv.runForever(stop)
                close(done)
        }()
 
@@ -567,18 +539,24 @@ func (s *runSuite) TestRunForever(c *check.C) {
        <-done
        c.Check(pullReqs.Count() >= 16, check.Equals, true)
        c.Check(trashReqs.Count(), check.Equals, pullReqs.Count()+4)
-       c.Check(s.getMetrics(c, srv), check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
+
+       buf, err := s.getMetrics(c, srv)
+       c.Check(err, check.IsNil)
+       c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count `+fmt.Sprintf("%d", pullReqs.Count()/4)+`\n.*`)
 }
 
-func (s *runSuite) getMetrics(c *check.C, srv *Server) string {
-       resp, err := http.Get("http://" + srv.listening + "/metrics")
-       c.Assert(err, check.IsNil)
-       c.Check(resp.StatusCode, check.Equals, http.StatusUnauthorized)
+func (s *runSuite) getMetrics(c *check.C, srv *Server) (*bytes.Buffer, error) {
+       mfs, err := srv.Metrics.reg.Gather()
+       if err != nil {
+               return nil, err
+       }
 
-       resp, err = http.Get("http://" + srv.listening + "/metrics?api_token=xyzzy")
-       c.Assert(err, check.IsNil)
-       c.Check(resp.StatusCode, check.Equals, http.StatusOK)
-       buf, err := ioutil.ReadAll(resp.Body)
-       c.Check(err, check.IsNil)
-       return string(buf)
+       var buf bytes.Buffer
+       for _, mf := range mfs {
+               if _, err := expfmt.MetricFamilyToText(&buf, mf); err != nil {
+                       return nil, err
+               }
+       }
+
+       return &buf, nil
 }
index 2259b3d8cf8e87ac83d08df9a15dfdd17c8d02c6..6cffa8ded4dbad6975225949e871852e5ca2d50e 100644 (file)
@@ -13,6 +13,7 @@ import (
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -69,6 +70,7 @@ func (bal *balancerSuite) SetUpSuite(c *check.C) {
        }
 
        bal.signatureTTL = 3600
+       bal.Logger = ctxlog.TestLogger(c)
 }
 
 func (bal *balancerSuite) SetUpTest(c *check.C) {
@@ -524,7 +526,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
        bal.srvs[9].mounts = []*KeepMount{{
                KeepMount: arvados.KeepMount{
                        Replication:    1,
-                       StorageClasses: []string{"special"},
+                       StorageClasses: map[string]bool{"special": true},
                        UUID:           "zzzzz-mount-special00000009",
                        DeviceID:       "9-special",
                },
@@ -532,7 +534,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
        }, {
                KeepMount: arvados.KeepMount{
                        Replication:    1,
-                       StorageClasses: []string{"special", "special2"},
+                       StorageClasses: map[string]bool{"special": true, "special2": true},
                        UUID:           "zzzzz-mount-special20000009",
                        DeviceID:       "9-special-and-special2",
                },
@@ -544,7 +546,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
        bal.srvs[13].mounts = []*KeepMount{{
                KeepMount: arvados.KeepMount{
                        Replication:    1,
-                       StorageClasses: []string{"special2"},
+                       StorageClasses: map[string]bool{"special2": true},
                        UUID:           "zzzzz-mount-special2000000d",
                        DeviceID:       "13-special2",
                },
@@ -552,7 +554,7 @@ func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
        }, {
                KeepMount: arvados.KeepMount{
                        Replication:    1,
-                       StorageClasses: []string{"default"},
+                       StorageClasses: map[string]bool{"default": true},
                        UUID:           "zzzzz-mount-00000000000000d",
                        DeviceID:       "13-default",
                },
index 6aaf07abae395241fdbd5f26be8ae111f14aac1f..a2200e1db90a4ddf69fd65112c432df9bbcba2c6 100644 (file)
@@ -29,7 +29,7 @@ func (s *integrationSuite) TestIdenticalTimestamps(c *check.C) {
                        longestStreak := 0
                        var lastMod time.Time
                        sawUUID := make(map[string]bool)
-                       err := EachCollection(&s.config.Client, pageSize, func(c arvados.Collection) error {
+                       err := EachCollection(s.client, pageSize, func(c arvados.Collection) error {
                                if c.ModifiedAt == nil {
                                        return nil
                                }
index a79779c7dc8f9fdb5eb7316a74c28fb614d9da52..5b0dc123ae49a627a98c4f5254ff6a1649e869e6 100644 (file)
@@ -11,10 +11,13 @@ import (
        "testing"
        "time"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
@@ -22,7 +25,8 @@ import (
 var _ = check.Suite(&integrationSuite{})
 
 type integrationSuite struct {
-       config     Config
+       config     *arvados.Cluster
+       client     *arvados.Client
        keepClient *keepclient.KeepClient
 }
 
@@ -59,14 +63,16 @@ func (s *integrationSuite) TearDownSuite(c *check.C) {
 }
 
 func (s *integrationSuite) SetUpTest(c *check.C) {
-       s.config = Config{
-               Client: arvados.Client{
-                       APIHost:   os.Getenv("ARVADOS_API_HOST"),
-                       AuthToken: arvadostest.DataManagerToken,
-                       Insecure:  true,
-               },
-               KeepServiceTypes: []string{"disk"},
-               RunPeriod:        arvados.Duration(time.Second),
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+       c.Assert(err, check.Equals, nil)
+       s.config, err = cfg.GetCluster("")
+       c.Assert(err, check.Equals, nil)
+       s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
+
+       s.client = &arvados.Client{
+               APIHost:   os.Getenv("ARVADOS_API_HOST"),
+               AuthToken: arvadostest.DataManagerToken,
+               Insecure:  true,
        }
 }
 
@@ -84,9 +90,9 @@ func (s *integrationSuite) TestBalanceAPIFixtures(c *check.C) {
 
                bal := &Balancer{
                        Logger:  logger,
-                       Metrics: newMetrics(),
+                       Metrics: newMetrics(prometheus.NewRegistry()),
                }
-               nextOpts, err := bal.Run(s.config, opts)
+               nextOpts, err := bal.Run(s.client, s.config, opts)
                c.Check(err, check.IsNil)
                c.Check(nextOpts.SafeRendezvousState, check.Not(check.Equals), "")
                c.Check(nextOpts.CommitPulls, check.Equals, true)
index 563871607874f9ad44a07315ce08bfd68274a23b..1b71fb4e44350bac913961e598494d0c01a333ab 100644 (file)
@@ -6,7 +6,6 @@
 Description=Arvados Keep Balance
 Documentation=https://doc.arvados.org/
 After=network.target
-AssertPathExists=/etc/arvados/keep-balance/keep-balance.yml
 
 # systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
 StartLimitInterval=0
index 84516a821060da1b795da1b40655a9a62157fd52..cf844ab050043c1662c3cf5cd62ef9ef238a6789 100644 (file)
@@ -5,97 +5,85 @@
 package main
 
 import (
-       "encoding/json"
+       "context"
        "flag"
        "fmt"
-       "log"
-       "net/http"
+       "io"
        "os"
-       "time"
 
+       "git.curoverse.com/arvados.git/lib/config"
+       "git.curoverse.com/arvados.git/lib/service"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/config"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
 )
 
-var debugf = func(string, ...interface{}) {}
-
 func main() {
-       var cfg Config
-       var runOptions RunOptions
+       os.Exit(runCommand(os.Args[0], os.Args[1:], os.Stdin, os.Stdout, os.Stderr))
+}
+
+func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       logger := ctxlog.FromContext(context.Background())
 
-       configPath := flag.String("config", defaultConfigPath,
-               "`path` of JSON or YAML configuration file")
-       serviceListPath := flag.String("config.KeepServiceList", "",
-               "`path` of JSON or YAML file with list of keep services to balance, as given by \"arv keep_service list\" "+
-                       "(default: config[\"KeepServiceList\"], or if none given, get all available services and filter by config[\"KeepServiceTypes\"])")
-       flag.BoolVar(&runOptions.Once, "once", false,
+       var options RunOptions
+       flags := flag.NewFlagSet(prog, flag.ExitOnError)
+       flags.BoolVar(&options.Once, "once", false,
                "balance once and then exit")
-       flag.BoolVar(&runOptions.CommitPulls, "commit-pulls", false,
+       flags.BoolVar(&options.CommitPulls, "commit-pulls", false,
                "send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
-       flag.BoolVar(&runOptions.CommitTrash, "commit-trash", false,
+       flags.BoolVar(&options.CommitTrash, "commit-trash", false,
                "send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
-       dumpConfig := flag.Bool("dump-config", false, "write current configuration to stdout and exit")
-       dumpFlag := flag.Bool("dump", false, "dump details for each block to stdout")
-       debugFlag := flag.Bool("debug", false, "enable debug messages")
-       getVersion := flag.Bool("version", false, "Print version information and exit.")
-       flag.Usage = usage
-       flag.Parse()
+       flags.Bool("version", false, "Write version information to stdout and exit 0")
+       dumpFlag := flags.Bool("dump", false, "dump details for each block to stdout")
 
-       // Print version information if requested
-       if *getVersion {
-               fmt.Printf("keep-balance %s\n", version)
-               return
-       }
+       loader := config.NewLoader(os.Stdin, logger)
+       loader.SetupFlags(flags)
 
-       mustReadConfig(&cfg, *configPath)
-       if *serviceListPath != "" {
-               mustReadConfig(&cfg.KeepServiceList, *serviceListPath)
-       }
+       munged := loader.MungeLegacyConfigArgs(logger, args, "-legacy-keepbalance-config")
+       flags.Parse(munged)
 
-       if *dumpConfig {
-               log.Fatal(config.DumpAndExit(cfg))
-       }
-
-       to := time.Duration(cfg.RequestTimeout)
-       if to == 0 {
-               to = 30 * time.Minute
-       }
-       arvados.DefaultSecureClient.Timeout = to
-       arvados.InsecureHTTPClient.Timeout = to
-       http.DefaultClient.Timeout = to
-
-       log.Printf("keep-balance %s started", version)
-
-       if *debugFlag {
-               debugf = log.Printf
-               if j, err := json.Marshal(cfg); err != nil {
-                       log.Fatal(err)
-               } else {
-                       log.Printf("config is %s", j)
-               }
-       }
        if *dumpFlag {
                dumper := logrus.New()
                dumper.Out = os.Stdout
                dumper.Formatter = &logrus.TextFormatter{}
-               runOptions.Dumper = dumper
-       }
-       srv, err := NewServer(cfg, runOptions)
-       if err != nil {
-               // (don't run)
-       } else if runOptions.Once {
-               _, err = srv.Run()
-       } else {
-               err = srv.RunForever(nil)
-       }
-       if err != nil {
-               log.Fatal(err)
+               options.Dumper = dumper
        }
-}
 
-func mustReadConfig(dst interface{}, path string) {
-       if err := config.LoadFile(dst, path); err != nil {
-               log.Fatal(err)
-       }
+       // Only pass along the version flag, which gets handled in RunCommand
+       args = nil
+       flags.Visit(func(f *flag.Flag) {
+               if f.Name == "version" {
+                       args = append(args, "-"+f.Name, f.Value.String())
+               }
+       })
+
+       return service.Command(arvados.ServiceNameKeepbalance,
+               func(ctx context.Context, cluster *arvados.Cluster, token string, registry *prometheus.Registry) service.Handler {
+                       if !options.Once && cluster.Collections.BalancePeriod == arvados.Duration(0) {
+                               return service.ErrorHandler(ctx, cluster, fmt.Errorf("cannot start service: Collections.BalancePeriod is zero (if you want to run once and then exit, use the -once flag)"))
+                       }
+
+                       ac, err := arvados.NewClientFromConfig(cluster)
+                       ac.AuthToken = token
+                       if err != nil {
+                               return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
+                       }
+
+                       if options.Logger == nil {
+                               options.Logger = ctxlog.FromContext(ctx)
+                       }
+
+                       srv := &Server{
+                               Cluster:    cluster,
+                               ArvClient:  ac,
+                               RunOptions: options,
+                               Metrics:    newMetrics(registry),
+                               Logger:     options.Logger,
+                               Dumper:     options.Dumper,
+                       }
+
+                       go srv.run()
+                       return srv
+               }).RunCommand(prog, args, stdin, stdout, stderr)
 }
diff --git a/services/keep-balance/main_test.go b/services/keep-balance/main_test.go
deleted file mode 100644 (file)
index a280434..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "time"
-
-       "github.com/ghodss/yaml"
-       check "gopkg.in/check.v1"
-)
-
-var _ = check.Suite(&mainSuite{})
-
-type mainSuite struct{}
-
-func (s *mainSuite) TestExampleJSON(c *check.C) {
-       var config Config
-       c.Check(yaml.Unmarshal(exampleConfigFile, &config), check.IsNil)
-       c.Check(config.KeepServiceTypes, check.DeepEquals, []string{"disk"})
-       c.Check(config.Client.AuthToken, check.Equals, "xyzzy")
-       c.Check(time.Duration(config.RunPeriod), check.Equals, 600*time.Second)
-}
-
-func (s *mainSuite) TestConfigJSONWithKeepServiceList(c *check.C) {
-       var config Config
-       c.Check(yaml.Unmarshal([]byte(`{
-                   "Client": {
-                       "APIHost": "zzzzz.arvadosapi.com:443",
-                       "AuthToken": "xyzzy",
-                       "Insecure": false
-                   },
-                   "KeepServiceList": {
-                       "items": [
-                           {"uuid":"zzzzz-bi64l-abcdefghijklmno", "service_type":"disk", "service_host":"a.zzzzz.arvadosapi.com", "service_port":12345},
-                           {"uuid":"zzzzz-bi64l-bcdefghijklmnop", "service_type":"blob", "service_host":"b.zzzzz.arvadosapi.com", "service_port":12345}
-                       ]
-                   },
-                   "RunPeriod": "600s"
-               }`), &config), check.IsNil)
-       c.Assert(len(config.KeepServiceList.Items), check.Equals, 2)
-       c.Check(config.KeepServiceList.Items[0].UUID, check.Equals, "zzzzz-bi64l-abcdefghijklmno")
-       c.Check(config.KeepServiceList.Items[0].ServicePort, check.Equals, 12345)
-       c.Check(config.Client.AuthToken, check.Equals, "xyzzy")
-}
index 5f3c98723d02a82e9410053657d3262dca3af1be..ce1b1811cc69f28f3fad955a5525b35a666baf3a 100644 (file)
@@ -24,9 +24,9 @@ type metrics struct {
        mtx         sync.Mutex
 }
 
-func newMetrics() *metrics {
+func newMetrics(registry *prometheus.Registry) *metrics {
        return &metrics{
-               reg:         prometheus.NewRegistry(),
+               reg:         registry,
                statsGauges: map[string]setter{},
                observers:   map[string]observer{},
        }
index e2f13a425ed8dfabc729649d98aa7e4ed977899a..b6806d552a89d750d2fbb51a8dce4faa70903b3e 100644 (file)
@@ -5,8 +5,6 @@
 package main
 
 import (
-       "context"
-       "fmt"
        "net/http"
        "os"
        "os/signal"
@@ -14,57 +12,9 @@ import (
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/auth"
-       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
-       "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/sirupsen/logrus"
 )
 
-var version = "dev"
-
-const (
-       defaultConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
-       rfc3339NanoFixed  = "2006-01-02T15:04:05.000000000Z07:00"
-)
-
-// Config specifies site configuration, like API credentials and the
-// choice of which servers are to be balanced.
-//
-// Config is loaded from a JSON config file (see usage()).
-type Config struct {
-       // Arvados API endpoint and credentials.
-       Client arvados.Client
-
-       // List of service types (e.g., "disk") to balance.
-       KeepServiceTypes []string
-
-       KeepServiceList arvados.KeepServiceList
-
-       // address, address:port, or :port for management interface
-       Listen string
-
-       // token for management APIs
-       ManagementToken string
-
-       // How often to check
-       RunPeriod arvados.Duration
-
-       // Number of collections to request in each API call
-       CollectionBatchSize int
-
-       // Max collections to buffer in memory (bigger values consume
-       // more memory, but can reduce store-and-forward latency when
-       // fetching pages)
-       CollectionBuffers int
-
-       // Timeout for outgoing http request/response cycle.
-       RequestTimeout arvados.Duration
-
-       // Destination filename for the list of lost block hashes, one
-       // per line. Updated atomically during each successful run.
-       LostBlocksFile string
-}
-
 // RunOptions controls runtime behavior. The flags/options that belong
 // here are the ones that are useful for interactive use. For example,
 // "CommitTrash" is a runtime option rather than a config item because
@@ -87,100 +37,70 @@ type RunOptions struct {
 }
 
 type Server struct {
-       config     Config
-       runOptions RunOptions
-       metrics    *metrics
-       listening  string // for tests
+       http.Handler
+
+       Cluster    *arvados.Cluster
+       ArvClient  *arvados.Client
+       RunOptions RunOptions
+       Metrics    *metrics
 
        Logger logrus.FieldLogger
        Dumper logrus.FieldLogger
 }
 
-// NewServer returns a new Server that runs Balancers using the given
-// config and runOptions.
-func NewServer(config Config, runOptions RunOptions) (*Server, error) {
-       if len(config.KeepServiceList.Items) > 0 && config.KeepServiceTypes != nil {
-               return nil, fmt.Errorf("cannot specify both KeepServiceList and KeepServiceTypes in config")
-       }
-       if !runOptions.Once && config.RunPeriod == arvados.Duration(0) {
-               return nil, fmt.Errorf("you must either use the -once flag, or specify RunPeriod in config")
-       }
-
-       if runOptions.Logger == nil {
-               log := logrus.New()
-               log.Formatter = &logrus.JSONFormatter{
-                       TimestampFormat: rfc3339NanoFixed,
-               }
-               log.Out = os.Stderr
-               runOptions.Logger = log
-       }
-
-       srv := &Server{
-               config:     config,
-               runOptions: runOptions,
-               metrics:    newMetrics(),
-               Logger:     runOptions.Logger,
-               Dumper:     runOptions.Dumper,
-       }
-       return srv, srv.start()
+// CheckHealth implements service.Handler.
+func (srv *Server) CheckHealth() error {
+       return nil
 }
 
-func (srv *Server) start() error {
-       if srv.config.Listen == "" {
-               return nil
-       }
-       ctx := ctxlog.Context(context.Background(), srv.Logger)
-       server := &httpserver.Server{
-               Server: http.Server{
-                       Handler: httpserver.HandlerWithContext(ctx,
-                               httpserver.LogRequests(
-                                       auth.RequireLiteralToken(srv.config.ManagementToken,
-                                               srv.metrics.Handler(srv.Logger)))),
-               },
-               Addr: srv.config.Listen,
+func (srv *Server) run() {
+       var err error
+       if srv.RunOptions.Once {
+               _, err = srv.runOnce()
+       } else {
+               err = srv.runForever(nil)
        }
-       err := server.Start()
        if err != nil {
-               return err
+               srv.Logger.Error(err)
+               os.Exit(1)
+       } else {
+               os.Exit(0)
        }
-       srv.Logger.Printf("listening at %s", server.Addr)
-       srv.listening = server.Addr
-       return nil
 }
 
-func (srv *Server) Run() (*Balancer, error) {
+func (srv *Server) runOnce() (*Balancer, error) {
        bal := &Balancer{
                Logger:         srv.Logger,
                Dumper:         srv.Dumper,
-               Metrics:        srv.metrics,
-               LostBlocksFile: srv.config.LostBlocksFile,
+               Metrics:        srv.Metrics,
+               LostBlocksFile: srv.Cluster.Collections.BlobMissingReport,
        }
        var err error
-       srv.runOptions, err = bal.Run(srv.config, srv.runOptions)
+       srv.RunOptions, err = bal.Run(srv.ArvClient, srv.Cluster, srv.RunOptions)
        return bal, err
 }
 
 // RunForever runs forever, or (for testing purposes) until the given
 // stop channel is ready to receive.
-func (srv *Server) RunForever(stop <-chan interface{}) error {
-       logger := srv.runOptions.Logger
+func (srv *Server) runForever(stop <-chan interface{}) error {
+       logger := srv.Logger
 
-       ticker := time.NewTicker(time.Duration(srv.config.RunPeriod))
+       ticker := time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
 
        // The unbuffered channel here means we only hear SIGUSR1 if
        // it arrives while we're waiting in select{}.
        sigUSR1 := make(chan os.Signal)
        signal.Notify(sigUSR1, syscall.SIGUSR1)
 
-       logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.config.RunPeriod)
+       logger.Printf("starting up: will scan every %v and on SIGUSR1", srv.Cluster.Collections.BalancePeriod)
 
        for {
-               if !srv.runOptions.CommitPulls && !srv.runOptions.CommitTrash {
+               if !srv.RunOptions.CommitPulls && !srv.RunOptions.CommitTrash {
                        logger.Print("WARNING: Will scan periodically, but no changes will be committed.")
                        logger.Print("=======  Consider using -commit-pulls and -commit-trash flags.")
                }
 
-               _, err := srv.Run()
+               _, err := srv.runOnce()
                if err != nil {
                        logger.Print("run failed: ", err)
                } else {
@@ -199,7 +119,7 @@ func (srv *Server) RunForever(stop <-chan interface{}) error {
                        // run too soon after the Nth run is triggered
                        // by SIGUSR1.
                        ticker.Stop()
-                       ticker = time.NewTicker(time.Duration(srv.config.RunPeriod))
+                       ticker = time.NewTicker(time.Duration(srv.Cluster.Collections.BalancePeriod))
                }
                logger.Print("starting next run")
        }
diff --git a/services/keep-balance/usage.go b/services/keep-balance/usage.go
deleted file mode 100644 (file)
index b39e839..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "flag"
-       "fmt"
-       "os"
-)
-
-var exampleConfigFile = []byte(`
-Client:
-    APIHost: zzzzz.arvadosapi.com:443
-    AuthToken: xyzzy
-    Insecure: false
-KeepServiceTypes:
-    - disk
-Listen: ":9005"
-ManagementToken: xyzzy
-RunPeriod: 600s
-CollectionBatchSize: 100000
-CollectionBuffers: 1000
-RequestTimeout: 30m`)
-
-func usage() {
-       fmt.Fprintf(os.Stderr, `
-
-keep-balance rebalances a set of keepstore servers. It creates new
-copies of underreplicated blocks, deletes excess copies of
-overreplicated and unreferenced blocks, and moves blocks to better
-positions (according to the rendezvous hash algorithm) so clients find
-them faster.
-
-Usage: keep-balance [options]
-
-Options:
-`)
-       flag.PrintDefaults()
-       fmt.Fprintf(os.Stderr, `
-Example config file:
-%s
-
-    Client.AuthToken must be recognized by Arvados as an admin token,
-    and must be recognized by all Keep services as a "data manager
-    key".
-
-    Client.Insecure should be true if your Arvados API endpoint uses
-    an unverifiable SSL/TLS certificate.
-
-Periodic scanning:
-
-    By default, keep-balance operates periodically, i.e.: do a
-    scan/balance operation, sleep, repeat.
-
-    RunPeriod determines the interval between start times of
-    successive scan/balance operations. If a scan/balance operation
-    takes longer than RunPeriod, the next one will follow it
-    immediately.
-
-    If SIGUSR1 is received during an idle period between operations,
-    the next operation will start immediately.
-
-One-time scanning:
-
-    Use the -once flag to do a single operation and then exit. The
-    exit code will be zero if the operation was successful.
-
-Committing:
-
-    By default, keep-service computes and reports changes but does not
-    implement them by sending pull and trash lists to the Keep
-    services.
-
-    Use the -commit-pull and -commit-trash flags to implement the
-    computed changes.
-
-Tuning resource usage:
-
-    CollectionBatchSize limits the number of collections retrieved per
-    API transaction. If this is zero or omitted, page size is
-    determined by the API server's own page size limits (see
-    max_items_per_response and max_index_database_read configs).
-
-    CollectionBuffers sets the size of an internal queue of
-    collections. Higher values use more memory, and improve throughput
-    by allowing keep-balance to fetch the next page of collections
-    while the current page is still being processed. If this is zero
-    or omitted, pages are processed serially.
-
-    RequestTimeout is the maximum time keep-balance will spend on a
-    single HTTP request (getting a page of collections, getting the
-    block index from a keepstore server, or sending a trash or pull
-    list to a keepstore server). Defaults to 30 minutes.
-
-Limitations:
-
-    keep-balance does not attempt to discover whether committed pull
-    and trash requests ever get carried out -- only that they are
-    accepted by the Keep services. If some services are full, new
-    copies of underreplicated blocks might never get made, only
-    repeatedly requested.
-
-`, exampleConfigFile)
-}
index 1090151e1ae3fd872da91e6badc3ff277008eba8..34333d43424863c9ced8662a5f6867de6723e48b 100644 (file)
@@ -21,6 +21,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        check "gopkg.in/check.v1"
 )
@@ -32,7 +33,7 @@ type UnitSuite struct {
 }
 
 func (s *UnitSuite) SetUpTest(c *check.C) {
-       ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), nil)
+       ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), ctxlog.TestLogger(c))
        ldr.Path = "-"
        cfg, err := ldr.Load()
        c.Assert(err, check.IsNil)
index b856090cac4e6557814e040d74a0c87b02e38904..0d2e776f1ab7869cd5a77cbea4d61f503ae9db23 100644 (file)
@@ -22,6 +22,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        check "gopkg.in/check.v1"
 )
@@ -429,7 +430,7 @@ func (s *IntegrationSuite) TearDownSuite(c *check.C) {
 
 func (s *IntegrationSuite) SetUpTest(c *check.C) {
        arvadostest.ResetEnv()
-       ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), nil)
+       ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), ctxlog.TestLogger(c))
        ldr.Path = "-"
        arvCfg, err := ldr.Load()
        c.Check(err, check.IsNil)
index d2758cc25f7ea3f92b35ec461f0d41c87958c72e..a6a73c831721e0c4489c041e2d893f5ac68599f9 100644 (file)
@@ -22,6 +22,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        log "github.com/sirupsen/logrus"
 
@@ -99,7 +100,7 @@ func (s *NoKeepServerSuite) TearDownSuite(c *C) {
 }
 
 func runProxy(c *C, bogusClientToken bool) *keepclient.KeepClient {
-       cfg, err := config.NewLoader(nil, nil).Load()
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
        c.Assert(err, Equals, nil)
        cluster, err := cfg.GetCluster("")
        c.Assert(err, Equals, nil)
index 3c17b3bd0641e2bee23007d775b1740e2c7a14d4..50a8edcbb7090728f22cdd576267dae9133e81db 100644 (file)
@@ -7,8 +7,8 @@ package main
 import (
        "bytes"
        "context"
+       "encoding/json"
        "errors"
-       "flag"
        "fmt"
        "io"
        "io/ioutil"
@@ -22,167 +22,48 @@ import (
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/Azure/azure-sdk-for-go/storage"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
-const (
-       azureDefaultRequestTimeout       = arvados.Duration(10 * time.Minute)
-       azureDefaultListBlobsMaxAttempts = 12
-       azureDefaultListBlobsRetryDelay  = arvados.Duration(10 * time.Second)
-)
-
-var (
-       azureMaxGetBytes           int
-       azureStorageAccountName    string
-       azureStorageAccountKeyFile string
-       azureStorageReplication    int
-       azureWriteRaceInterval     = 15 * time.Second
-       azureWriteRacePollTime     = time.Second
-)
-
-func readKeyFromFile(file string) (string, error) {
-       buf, err := ioutil.ReadFile(file)
-       if err != nil {
-               return "", errors.New("reading key from " + file + ": " + err.Error())
-       }
-       accountKey := strings.TrimSpace(string(buf))
-       if accountKey == "" {
-               return "", errors.New("empty account key in " + file)
-       }
-       return accountKey, nil
-}
-
-type azureVolumeAdder struct {
-       *Config
-}
-
-// String implements flag.Value
-func (s *azureVolumeAdder) String() string {
-       return "-"
-}
-
-func (s *azureVolumeAdder) Set(containerName string) error {
-       s.Config.Volumes = append(s.Config.Volumes, &AzureBlobVolume{
-               ContainerName:         containerName,
-               StorageAccountName:    azureStorageAccountName,
-               StorageAccountKeyFile: azureStorageAccountKeyFile,
-               AzureReplication:      azureStorageReplication,
-               ReadOnly:              deprecated.flagReadonly,
-       })
-       return nil
-}
-
 func init() {
-       VolumeTypes = append(VolumeTypes, func() VolumeWithExamples { return &AzureBlobVolume{} })
-
-       flag.Var(&azureVolumeAdder{theConfig},
-               "azure-storage-container-volume",
-               "Use the given container as a storage volume. Can be given multiple times.")
-       flag.StringVar(
-               &azureStorageAccountName,
-               "azure-storage-account-name",
-               "",
-               "Azure storage account name used for subsequent --azure-storage-container-volume arguments.")
-       flag.StringVar(
-               &azureStorageAccountKeyFile,
-               "azure-storage-account-key-file",
-               "",
-               "`File` containing the account key used for subsequent --azure-storage-container-volume arguments.")
-       flag.IntVar(
-               &azureStorageReplication,
-               "azure-storage-replication",
-               3,
-               "Replication level to report to clients when data is stored in an Azure container.")
-       flag.IntVar(
-               &azureMaxGetBytes,
-               "azure-max-get-bytes",
-               BlockSize,
-               fmt.Sprintf("Maximum bytes to request in a single GET request. If smaller than %d, use multiple concurrent range requests to retrieve a block.", BlockSize))
-}
-
-// An AzureBlobVolume stores and retrieves blocks in an Azure Blob
-// container.
-type AzureBlobVolume struct {
-       StorageAccountName    string
-       StorageAccountKeyFile string
-       StorageBaseURL        string // "" means default, "core.windows.net"
-       ContainerName         string
-       AzureReplication      int
-       ReadOnly              bool
-       RequestTimeout        arvados.Duration
-       StorageClasses        []string
-       ListBlobsRetryDelay   arvados.Duration
-       ListBlobsMaxAttempts  int
-
-       azClient  storage.Client
-       container *azureContainer
-}
-
-// singleSender is a single-attempt storage.Sender.
-type singleSender struct{}
-
-// Send performs req exactly once.
-func (*singleSender) Send(c *storage.Client, req *http.Request) (resp *http.Response, err error) {
-       return c.HTTPClient.Do(req)
+       driver["Azure"] = newAzureBlobVolume
 }
 
-// Examples implements VolumeWithExamples.
-func (*AzureBlobVolume) Examples() []Volume {
-       return []Volume{
-               &AzureBlobVolume{
-                       StorageAccountName:    "example-account-name",
-                       StorageAccountKeyFile: "/etc/azure_storage_account_key.txt",
-                       ContainerName:         "example-container-name",
-                       AzureReplication:      3,
-                       RequestTimeout:        azureDefaultRequestTimeout,
-               },
-               &AzureBlobVolume{
-                       StorageAccountName:    "cn-account-name",
-                       StorageAccountKeyFile: "/etc/azure_cn_storage_account_key.txt",
-                       StorageBaseURL:        "core.chinacloudapi.cn",
-                       ContainerName:         "cn-container-name",
-                       AzureReplication:      3,
-                       RequestTimeout:        azureDefaultRequestTimeout,
-               },
+func newAzureBlobVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
+       v := &AzureBlobVolume{
+               RequestTimeout:    azureDefaultRequestTimeout,
+               WriteRaceInterval: azureDefaultWriteRaceInterval,
+               WriteRacePollTime: azureDefaultWriteRacePollTime,
+               cluster:           cluster,
+               volume:            volume,
+               logger:            logger,
+               metrics:           metrics,
+       }
+       err := json.Unmarshal(volume.DriverParameters, &v)
+       if err != nil {
+               return nil, err
        }
-}
-
-// Type implements Volume.
-func (v *AzureBlobVolume) Type() string {
-       return "Azure"
-}
-
-// Start implements Volume.
-func (v *AzureBlobVolume) Start(vm *volumeMetricsVecs) error {
        if v.ListBlobsRetryDelay == 0 {
                v.ListBlobsRetryDelay = azureDefaultListBlobsRetryDelay
        }
        if v.ListBlobsMaxAttempts == 0 {
                v.ListBlobsMaxAttempts = azureDefaultListBlobsMaxAttempts
        }
-       if v.ContainerName == "" {
-               return errors.New("no container name given")
-       }
-       if v.StorageAccountName == "" || v.StorageAccountKeyFile == "" {
-               return errors.New("StorageAccountName and StorageAccountKeyFile must be given")
-       }
-       accountKey, err := readKeyFromFile(v.StorageAccountKeyFile)
-       if err != nil {
-               return err
-       }
        if v.StorageBaseURL == "" {
                v.StorageBaseURL = storage.DefaultBaseURL
        }
-       v.azClient, err = storage.NewClient(v.StorageAccountName, accountKey, v.StorageBaseURL, storage.DefaultAPIVersion, true)
+       if v.ContainerName == "" || v.StorageAccountName == "" || v.StorageAccountKey == "" {
+               return nil, errors.New("DriverParameters: ContainerName, StorageAccountName, and StorageAccountKey must be provided")
+       }
+       azc, err := storage.NewClient(v.StorageAccountName, v.StorageAccountKey, v.StorageBaseURL, storage.DefaultAPIVersion, true)
        if err != nil {
-               return fmt.Errorf("creating Azure storage client: %s", err)
+               return nil, fmt.Errorf("creating Azure storage client: %s", err)
        }
+       v.azClient = azc
        v.azClient.Sender = &singleSender{}
-
-       if v.RequestTimeout == 0 {
-               v.RequestTimeout = azureDefaultRequestTimeout
-       }
        v.azClient.HTTPClient = &http.Client{
                Timeout: time.Duration(v.RequestTimeout),
        }
@@ -192,19 +73,64 @@ func (v *AzureBlobVolume) Start(vm *volumeMetricsVecs) error {
        }
 
        if ok, err := v.container.Exists(); err != nil {
-               return err
+               return nil, err
        } else if !ok {
-               return fmt.Errorf("Azure container %q does not exist", v.ContainerName)
+               return nil, fmt.Errorf("Azure container %q does not exist: %s", v.ContainerName, err)
        }
-       // Set up prometheus metrics
-       lbls := prometheus.Labels{"device_id": v.DeviceID()}
-       v.container.stats.opsCounters, v.container.stats.errCounters, v.container.stats.ioBytes = vm.getCounterVecsFor(lbls)
+       return v, v.check()
+}
 
+func (v *AzureBlobVolume) check() error {
+       lbls := prometheus.Labels{"device_id": v.GetDeviceID()}
+       v.container.stats.opsCounters, v.container.stats.errCounters, v.container.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
        return nil
 }
 
-// DeviceID returns a globally unique ID for the storage container.
-func (v *AzureBlobVolume) DeviceID() string {
+const (
+       azureDefaultRequestTimeout       = arvados.Duration(10 * time.Minute)
+       azureDefaultListBlobsMaxAttempts = 12
+       azureDefaultListBlobsRetryDelay  = arvados.Duration(10 * time.Second)
+       azureDefaultWriteRaceInterval    = arvados.Duration(15 * time.Second)
+       azureDefaultWriteRacePollTime    = arvados.Duration(time.Second)
+)
+
+// An AzureBlobVolume stores and retrieves blocks in an Azure Blob
+// container.
+type AzureBlobVolume struct {
+       StorageAccountName   string
+       StorageAccountKey    string
+       StorageBaseURL       string // "" means default, "core.windows.net"
+       ContainerName        string
+       RequestTimeout       arvados.Duration
+       ListBlobsRetryDelay  arvados.Duration
+       ListBlobsMaxAttempts int
+       MaxGetBytes          int
+       WriteRaceInterval    arvados.Duration
+       WriteRacePollTime    arvados.Duration
+
+       cluster   *arvados.Cluster
+       volume    arvados.Volume
+       logger    logrus.FieldLogger
+       metrics   *volumeMetricsVecs
+       azClient  storage.Client
+       container *azureContainer
+}
+
+// singleSender is a single-attempt storage.Sender.
+type singleSender struct{}
+
+// Send performs req exactly once.
+func (*singleSender) Send(c *storage.Client, req *http.Request) (resp *http.Response, err error) {
+       return c.HTTPClient.Do(req)
+}
+
+// Type implements Volume.
+func (v *AzureBlobVolume) Type() string {
+       return "Azure"
+}
+
+// GetDeviceID returns a globally unique ID for the storage container.
+func (v *AzureBlobVolume) GetDeviceID() string {
        return "azure://" + v.StorageBaseURL + "/" + v.StorageAccountName + "/" + v.ContainerName
 }
 
@@ -245,14 +171,14 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
                if !haveDeadline {
                        t, err := v.Mtime(loc)
                        if err != nil {
-                               log.Print("Got empty block (possible race) but Mtime failed: ", err)
+                               ctxlog.FromContext(ctx).Print("Got empty block (possible race) but Mtime failed: ", err)
                                break
                        }
-                       deadline = t.Add(azureWriteRaceInterval)
+                       deadline = t.Add(v.WriteRaceInterval.Duration())
                        if time.Now().After(deadline) {
                                break
                        }
-                       log.Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
+                       ctxlog.FromContext(ctx).Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
                        haveDeadline = true
                } else if time.Now().After(deadline) {
                        break
@@ -260,12 +186,12 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
                select {
                case <-ctx.Done():
                        return 0, ctx.Err()
-               case <-time.After(azureWriteRacePollTime):
+               case <-time.After(v.WriteRacePollTime.Duration()):
                }
                size, err = v.get(ctx, loc, buf)
        }
        if haveDeadline {
-               log.Printf("Race ended with size==%d", size)
+               ctxlog.FromContext(ctx).Printf("Race ended with size==%d", size)
        }
        return size, err
 }
@@ -273,8 +199,15 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
 func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int, error) {
        ctx, cancel := context.WithCancel(ctx)
        defer cancel()
+
+       pieceSize := BlockSize
+       if v.MaxGetBytes > 0 && v.MaxGetBytes < BlockSize {
+               pieceSize = v.MaxGetBytes
+       }
+
+       pieces := 1
        expectSize := len(buf)
-       if azureMaxGetBytes < BlockSize {
+       if pieceSize < BlockSize {
                // Unfortunately the handler doesn't tell us how long the blob
                // is expected to be, so we have to ask Azure.
                props, err := v.container.GetBlobProperties(loc)
@@ -285,6 +218,7 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
                        return 0, fmt.Errorf("block %s invalid size %d (max %d)", loc, props.ContentLength, BlockSize)
                }
                expectSize = int(props.ContentLength)
+               pieces = (expectSize + pieceSize - 1) / pieceSize
        }
 
        if expectSize == 0 {
@@ -293,7 +227,6 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
 
        // We'll update this actualSize if/when we get the last piece.
        actualSize := -1
-       pieces := (expectSize + azureMaxGetBytes - 1) / azureMaxGetBytes
        errors := make(chan error, pieces)
        var wg sync.WaitGroup
        wg.Add(pieces)
@@ -308,8 +241,8 @@ func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int,
                // interrupted as a result.
                go func(p int) {
                        defer wg.Done()
-                       startPos := p * azureMaxGetBytes
-                       endPos := startPos + azureMaxGetBytes
+                       startPos := p * pieceSize
+                       endPos := startPos + pieceSize
                        if endPos > expectSize {
                                endPos = expectSize
                        }
@@ -412,7 +345,7 @@ func (v *AzureBlobVolume) Compare(ctx context.Context, loc string, expect []byte
 
 // Put stores a Keep block as a block blob in the container.
 func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        // Send the block data through a pipe, so that (if we need to)
@@ -441,7 +374,7 @@ func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) err
        }()
        select {
        case <-ctx.Done():
-               theConfig.debugLogf("%s: taking CreateBlockBlobFromReader's input away: %s", v, ctx.Err())
+               ctxlog.FromContext(ctx).Debugf("%s: taking CreateBlockBlobFromReader's input away: %s", v, ctx.Err())
                // Our pipe might be stuck in Write(), waiting for
                // io.Copy() to read. If so, un-stick it. This means
                // CreateBlockBlobFromReader will get corrupt data,
@@ -450,7 +383,7 @@ func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) err
                go io.Copy(ioutil.Discard, bufr)
                // CloseWithError() will return once pending I/O is done.
                bufw.CloseWithError(ctx.Err())
-               theConfig.debugLogf("%s: abandoning CreateBlockBlobFromReader goroutine", v)
+               ctxlog.FromContext(ctx).Debugf("%s: abandoning CreateBlockBlobFromReader goroutine", v)
                return ctx.Err()
        case err := <-errChan:
                return err
@@ -459,7 +392,7 @@ func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) err
 
 // Touch updates the last-modified property of a block blob.
 func (v *AzureBlobVolume) Touch(loc string) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        trashed, metadata, err := v.checkTrashed(loc)
@@ -508,7 +441,7 @@ func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error {
                                continue
                        }
                        modtime := time.Time(b.Properties.LastModified)
-                       if b.Properties.ContentLength == 0 && modtime.Add(azureWriteRaceInterval).After(time.Now()) {
+                       if b.Properties.ContentLength == 0 && modtime.Add(v.WriteRaceInterval.Duration()).After(time.Now()) {
                                // A new zero-length blob is probably
                                // just a new non-empty blob that
                                // hasn't committed its data yet (see
@@ -535,7 +468,7 @@ func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters
                resp, err = v.container.ListBlobs(params)
                err = v.translateError(err)
                if err == VolumeBusyError {
-                       log.Printf("ListBlobs: will retry page %d in %s after error: %s", page, v.ListBlobsRetryDelay, err)
+                       v.logger.Printf("ListBlobs: will retry page %d in %s after error: %s", page, v.ListBlobsRetryDelay, err)
                        time.Sleep(time.Duration(v.ListBlobsRetryDelay))
                        continue
                } else {
@@ -547,7 +480,7 @@ func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters
 
 // Trash a Keep block.
 func (v *AzureBlobVolume) Trash(loc string) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
 
@@ -562,12 +495,12 @@ func (v *AzureBlobVolume) Trash(loc string) error {
        }
        if t, err := v.Mtime(loc); err != nil {
                return err
-       } else if time.Since(t) < theConfig.BlobSignatureTTL.Duration() {
+       } else if time.Since(t) < v.cluster.Collections.BlobSigningTTL.Duration() {
                return nil
        }
 
-       // If TrashLifetime == 0, just delete it
-       if theConfig.TrashLifetime == 0 {
+       // If BlobTrashLifetime == 0, just delete it
+       if v.cluster.Collections.BlobTrashLifetime == 0 {
                return v.container.DeleteBlob(loc, &storage.DeleteBlobOptions{
                        IfMatch: props.Etag,
                })
@@ -575,7 +508,7 @@ func (v *AzureBlobVolume) Trash(loc string) error {
 
        // Otherwise, mark as trash
        return v.container.SetBlobMetadata(loc, storage.BlobMetadata{
-               "expires_at": fmt.Sprintf("%d", time.Now().Add(theConfig.TrashLifetime.Duration()).Unix()),
+               "expires_at": fmt.Sprintf("%d", time.Now().Add(v.cluster.Collections.BlobTrashLifetime.Duration()).Unix()),
        }, &storage.SetBlobMetadataOptions{
                IfMatch: props.Etag,
        })
@@ -613,23 +546,6 @@ func (v *AzureBlobVolume) String() string {
        return fmt.Sprintf("azure-storage-container:%+q", v.ContainerName)
 }
 
-// Writable returns true, unless the -readonly flag was on when the
-// volume was added.
-func (v *AzureBlobVolume) Writable() bool {
-       return !v.ReadOnly
-}
-
-// Replication returns the replication level of the container, as
-// specified by the -azure-storage-replication argument.
-func (v *AzureBlobVolume) Replication() int {
-       return v.AzureReplication
-}
-
-// GetStorageClasses implements Volume
-func (v *AzureBlobVolume) GetStorageClasses() []string {
-       return v.StorageClasses
-}
-
 // If possible, translate an Azure SDK error to a recognizable error
 // like os.ErrNotExist.
 func (v *AzureBlobVolume) translateError(err error) error {
@@ -653,9 +569,13 @@ func (v *AzureBlobVolume) isKeepBlock(s string) bool {
        return keepBlockRegexp.MatchString(s)
 }
 
-// EmptyTrash looks for trashed blocks that exceeded TrashLifetime
+// EmptyTrash looks for trashed blocks that exceeded BlobTrashLifetime
 // and deletes them from the volume.
 func (v *AzureBlobVolume) EmptyTrash() {
+       if v.cluster.Collections.BlobDeleteConcurrency < 1 {
+               return
+       }
+
        var bytesDeleted, bytesInTrash int64
        var blocksDeleted, blocksInTrash int64
 
@@ -670,7 +590,7 @@ func (v *AzureBlobVolume) EmptyTrash() {
 
                expiresAt, err := strconv.ParseInt(b.Metadata["expires_at"], 10, 64)
                if err != nil {
-                       log.Printf("EmptyTrash: ParseInt(%v): %v", b.Metadata["expires_at"], err)
+                       v.logger.Printf("EmptyTrash: ParseInt(%v): %v", b.Metadata["expires_at"], err)
                        return
                }
 
@@ -682,7 +602,7 @@ func (v *AzureBlobVolume) EmptyTrash() {
                        IfMatch: b.Properties.Etag,
                })
                if err != nil {
-                       log.Printf("EmptyTrash: DeleteBlob(%v): %v", b.Name, err)
+                       v.logger.Printf("EmptyTrash: DeleteBlob(%v): %v", b.Name, err)
                        return
                }
                atomic.AddInt64(&blocksDeleted, 1)
@@ -690,8 +610,8 @@ func (v *AzureBlobVolume) EmptyTrash() {
        }
 
        var wg sync.WaitGroup
-       todo := make(chan storage.Blob, theConfig.EmptyTrashWorkers)
-       for i := 0; i < 1 || i < theConfig.EmptyTrashWorkers; i++ {
+       todo := make(chan storage.Blob, v.cluster.Collections.BlobDeleteConcurrency)
+       for i := 0; i < v.cluster.Collections.BlobDeleteConcurrency; i++ {
                wg.Add(1)
                go func() {
                        defer wg.Done()
@@ -705,7 +625,7 @@ func (v *AzureBlobVolume) EmptyTrash() {
        for page := 1; ; page++ {
                resp, err := v.listBlobs(page, params)
                if err != nil {
-                       log.Printf("EmptyTrash: ListBlobs: %v", err)
+                       v.logger.Printf("EmptyTrash: ListBlobs: %v", err)
                        break
                }
                for _, b := range resp.Blobs {
@@ -719,7 +639,7 @@ func (v *AzureBlobVolume) EmptyTrash() {
        close(todo)
        wg.Wait()
 
-       log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
+       v.logger.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
 }
 
 // InternalStats returns bucket I/O and API call counters.
@@ -748,7 +668,6 @@ func (s *azureBlobStats) TickErr(err error) {
        if err, ok := err.(storage.AzureStorageServiceError); ok {
                errType = errType + fmt.Sprintf(" %d (%s)", err.StatusCode, err.Code)
        }
-       log.Printf("errType %T, err %s", err, err)
        s.statsTicker.TickErr(err, errType)
 }
 
index 8d02def1445c3f0d7f6ed5806c4c226b75e41644..3539c3067cbd775693aef61ed3b6ed0a7901b71f 100644 (file)
@@ -24,13 +24,13 @@ import (
        "strconv"
        "strings"
        "sync"
-       "testing"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/Azure/azure-sdk-for-go/storage"
-       "github.com/ghodss/yaml"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
@@ -66,14 +66,16 @@ type azBlob struct {
 
 type azStubHandler struct {
        sync.Mutex
+       logger     logrus.FieldLogger
        blobs      map[string]*azBlob
        race       chan chan struct{}
        didlist503 bool
 }
 
-func newAzStubHandler() *azStubHandler {
+func newAzStubHandler(c *check.C) *azStubHandler {
        return &azStubHandler{
-               blobs: make(map[string]*azBlob),
+               blobs:  make(map[string]*azBlob),
+               logger: ctxlog.TestLogger(c),
        }
 }
 
@@ -117,7 +119,7 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
        h.Lock()
        defer h.Unlock()
        if azureTestDebug {
-               defer log.Printf("azStubHandler: %+v", r)
+               defer h.logger.Printf("azStubHandler: %+v", r)
        }
 
        path := strings.Split(r.URL.Path, "/")
@@ -128,7 +130,7 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
        }
 
        if err := r.ParseForm(); err != nil {
-               log.Printf("azStubHandler(%+v): %s", r, err)
+               h.logger.Printf("azStubHandler(%+v): %s", r, err)
                rw.WriteHeader(http.StatusBadRequest)
                return
        }
@@ -184,13 +186,13 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
        case r.Method == "PUT" && r.Form.Get("comp") == "block":
                // "Put Block" API
                if !blobExists {
-                       log.Printf("Got block for nonexistent blob: %+v", r)
+                       h.logger.Printf("Got block for nonexistent blob: %+v", r)
                        rw.WriteHeader(http.StatusBadRequest)
                        return
                }
                blockID, err := base64.StdEncoding.DecodeString(r.Form.Get("blockid"))
                if err != nil || len(blockID) == 0 {
-                       log.Printf("Invalid blockid: %+q", r.Form.Get("blockid"))
+                       h.logger.Printf("Invalid blockid: %+q", r.Form.Get("blockid"))
                        rw.WriteHeader(http.StatusBadRequest)
                        return
                }
@@ -200,14 +202,14 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
                // "Put Block List" API
                bl := &blockListRequestBody{}
                if err := xml.Unmarshal(body, bl); err != nil {
-                       log.Printf("xml Unmarshal: %s", err)
+                       h.logger.Printf("xml Unmarshal: %s", err)
                        rw.WriteHeader(http.StatusBadRequest)
                        return
                }
                for _, encBlockID := range bl.Uncommitted {
                        blockID, err := base64.StdEncoding.DecodeString(encBlockID)
                        if err != nil || len(blockID) == 0 || blob.Uncommitted[string(blockID)] == nil {
-                               log.Printf("Invalid blockid: %+q", encBlockID)
+                               h.logger.Printf("Invalid blockid: %+q", encBlockID)
                                rw.WriteHeader(http.StatusBadRequest)
                                return
                        }
@@ -223,7 +225,7 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
                // sets metadata headers only as a way to bump Etag
                // and Last-Modified.
                if !blobExists {
-                       log.Printf("Got metadata for nonexistent blob: %+v", r)
+                       h.logger.Printf("Got metadata for nonexistent blob: %+v", r)
                        rw.WriteHeader(http.StatusBadRequest)
                        return
                }
@@ -269,7 +271,7 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
                rw.Header().Set("Content-Length", strconv.Itoa(len(data)))
                if r.Method == "GET" {
                        if _, err := rw.Write(data); err != nil {
-                               log.Printf("write %+q: %s", data, err)
+                               h.logger.Printf("write %+q: %s", data, err)
                        }
                }
                h.unlockAndRace()
@@ -333,12 +335,12 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
                }
                buf, err := xml.Marshal(resp)
                if err != nil {
-                       log.Print(err)
+                       h.logger.Error(err)
                        rw.WriteHeader(http.StatusInternalServerError)
                }
                rw.Write(buf)
        default:
-               log.Printf("azStubHandler: not implemented: %+v Body:%+q", r, body)
+               h.logger.Printf("azStubHandler: not implemented: %+v Body:%+q", r, body)
                rw.WriteHeader(http.StatusNotImplemented)
        }
 }
@@ -347,6 +349,7 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
 // tries to connect to "devstoreaccount1.blob.127.0.0.1:46067", and
 // in such cases transparently dials "127.0.0.1:46067" instead.
 type azStubDialer struct {
+       logger logrus.FieldLogger
        net.Dialer
 }
 
@@ -355,7 +358,7 @@ var localHostPortRe = regexp.MustCompile(`(127\.0\.0\.1|localhost|\[::1\]):\d+`)
 func (d *azStubDialer) Dial(network, address string) (net.Conn, error) {
        if hp := localHostPortRe.FindString(address); hp != "" {
                if azureTestDebug {
-                       log.Println("azStubDialer: dial", hp, "instead of", address)
+                       d.logger.Debug("azStubDialer: dial", hp, "instead of", address)
                }
                address = hp
        }
@@ -369,29 +372,24 @@ type TestableAzureBlobVolume struct {
        t         TB
 }
 
-func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableAzureBlobVolume {
-       azHandler := newAzStubHandler()
+func (s *StubbedAzureBlobSuite) newTestableAzureBlobVolume(t TB, cluster *arvados.Cluster, volume arvados.Volume, metrics *volumeMetricsVecs) *TestableAzureBlobVolume {
+       azHandler := newAzStubHandler(t.(*check.C))
        azStub := httptest.NewServer(azHandler)
 
        var azClient storage.Client
+       var err error
 
        container := azureTestContainer
        if container == "" {
                // Connect to stub instead of real Azure storage service
                stubURLBase := strings.Split(azStub.URL, "://")[1]
-               var err error
                if azClient, err = storage.NewClient(fakeAccountName, fakeAccountKey, stubURLBase, storage.DefaultAPIVersion, false); err != nil {
                        t.Fatal(err)
                }
                container = "fakecontainername"
        } else {
                // Connect to real Azure storage service
-               accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
-               if err != nil {
-                       t.Fatal(err)
-               }
-               azClient, err = storage.NewBasicClient(azureStorageAccountName, accountKey)
-               if err != nil {
+               if azClient, err = storage.NewBasicClient(os.Getenv("ARVADOS_TEST_AZURE_ACCOUNT_NAME"), os.Getenv("ARVADOS_TEST_AZURE_ACCOUNT_KEY")); err != nil {
                        t.Fatal(err)
                }
        }
@@ -400,12 +398,19 @@ func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableA
        bs := azClient.GetBlobService()
        v := &AzureBlobVolume{
                ContainerName:        container,
-               ReadOnly:             readonly,
-               AzureReplication:     replication,
+               WriteRaceInterval:    arvados.Duration(time.Millisecond),
+               WriteRacePollTime:    arvados.Duration(time.Nanosecond),
                ListBlobsMaxAttempts: 2,
                ListBlobsRetryDelay:  arvados.Duration(time.Millisecond),
                azClient:             azClient,
                container:            &azureContainer{ctr: bs.GetContainerReference(container)},
+               cluster:              cluster,
+               volume:               volume,
+               logger:               ctxlog.TestLogger(t),
+               metrics:              metrics,
+       }
+       if err = v.check(); err != nil {
+               t.Fatal(err)
        }
 
        return &TestableAzureBlobVolume{
@@ -419,84 +424,45 @@ func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableA
 var _ = check.Suite(&StubbedAzureBlobSuite{})
 
 type StubbedAzureBlobSuite struct {
-       volume            *TestableAzureBlobVolume
        origHTTPTransport http.RoundTripper
 }
 
 func (s *StubbedAzureBlobSuite) SetUpTest(c *check.C) {
        s.origHTTPTransport = http.DefaultTransport
        http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
+               Dial: (&azStubDialer{logger: ctxlog.TestLogger(c)}).Dial,
        }
-       azureWriteRaceInterval = time.Millisecond
-       azureWriteRacePollTime = time.Nanosecond
-
-       s.volume = NewTestableAzureBlobVolume(c, false, 3)
 }
 
 func (s *StubbedAzureBlobSuite) TearDownTest(c *check.C) {
-       s.volume.Teardown()
        http.DefaultTransport = s.origHTTPTransport
 }
 
-func TestAzureBlobVolumeWithGeneric(t *testing.T) {
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-       azureWriteRaceInterval = time.Millisecond
-       azureWriteRacePollTime = time.Nanosecond
-       DoGenericVolumeTests(t, func(t TB) TestableVolume {
-               return NewTestableAzureBlobVolume(t, false, azureStorageReplication)
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeWithGeneric(c *check.C) {
+       DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableAzureBlobVolume(t, cluster, volume, metrics)
        })
 }
 
-func TestAzureBlobVolumeConcurrentRanges(t *testing.T) {
-       defer func(b int) {
-               azureMaxGetBytes = b
-       }(azureMaxGetBytes)
-
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-       azureWriteRaceInterval = time.Millisecond
-       azureWriteRacePollTime = time.Nanosecond
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeConcurrentRanges(c *check.C) {
        // Test (BlockSize mod azureMaxGetBytes)==0 and !=0 cases
-       for _, azureMaxGetBytes = range []int{2 << 22, 2<<22 - 1} {
-               DoGenericVolumeTests(t, func(t TB) TestableVolume {
-                       return NewTestableAzureBlobVolume(t, false, azureStorageReplication)
+       for _, b := range []int{2 << 22, 2<<22 - 1} {
+               DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+                       v := s.newTestableAzureBlobVolume(t, cluster, volume, metrics)
+                       v.MaxGetBytes = b
+                       return v
                })
        }
 }
 
-func TestReadonlyAzureBlobVolumeWithGeneric(t *testing.T) {
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-       azureWriteRaceInterval = time.Millisecond
-       azureWriteRacePollTime = time.Nanosecond
-       DoGenericVolumeTests(t, func(t TB) TestableVolume {
-               return NewTestableAzureBlobVolume(t, true, azureStorageReplication)
+func (s *StubbedAzureBlobSuite) TestReadonlyAzureBlobVolumeWithGeneric(c *check.C) {
+       DoGenericVolumeTests(c, false, func(c TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableAzureBlobVolume(c, cluster, volume, metrics)
        })
 }
 
-func TestAzureBlobVolumeRangeFenceposts(t *testing.T) {
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-
-       v := NewTestableAzureBlobVolume(t, false, 3)
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeRangeFenceposts(c *check.C) {
+       v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
        defer v.Teardown()
 
        for _, size := range []int{
@@ -514,47 +480,27 @@ func TestAzureBlobVolumeRangeFenceposts(t *testing.T) {
                hash := fmt.Sprintf("%x", md5.Sum(data))
                err := v.Put(context.Background(), hash, data)
                if err != nil {
-                       t.Error(err)
+                       c.Error(err)
                }
                gotData := make([]byte, len(data))
                gotLen, err := v.Get(context.Background(), hash, gotData)
                if err != nil {
-                       t.Error(err)
+                       c.Error(err)
                }
                gotHash := fmt.Sprintf("%x", md5.Sum(gotData))
                if gotLen != size {
-                       t.Errorf("length mismatch: got %d != %d", gotLen, size)
+                       c.Errorf("length mismatch: got %d != %d", gotLen, size)
                }
                if gotHash != hash {
-                       t.Errorf("hash mismatch: got %s != %s", gotHash, hash)
-               }
-       }
-}
-
-func TestAzureBlobVolumeReplication(t *testing.T) {
-       for r := 1; r <= 4; r++ {
-               v := NewTestableAzureBlobVolume(t, false, r)
-               defer v.Teardown()
-               if n := v.Replication(); n != r {
-                       t.Errorf("Got replication %d, expected %d", n, r)
+                       c.Errorf("hash mismatch: got %s != %s", gotHash, hash)
                }
        }
 }
 
-func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-
-       v := NewTestableAzureBlobVolume(t, false, 3)
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRace(c *check.C) {
+       v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
        defer v.Teardown()
 
-       azureWriteRaceInterval = time.Second
-       azureWriteRacePollTime = time.Millisecond
-
        var wg sync.WaitGroup
 
        v.azHandler.race = make(chan chan struct{})
@@ -564,7 +510,7 @@ func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
                defer wg.Done()
                err := v.Put(context.Background(), TestHash, TestBlock)
                if err != nil {
-                       t.Error(err)
+                       c.Error(err)
                }
        }()
        continuePut := make(chan struct{})
@@ -576,7 +522,7 @@ func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
                buf := make([]byte, len(TestBlock))
                _, err := v.Get(context.Background(), TestHash, buf)
                if err != nil {
-                       t.Error(err)
+                       c.Error(err)
                }
        }()
        // Wait for the stub's Get to get the empty blob
@@ -588,26 +534,18 @@ func TestAzureBlobVolumeCreateBlobRace(t *testing.T) {
        wg.Wait()
 }
 
-func TestAzureBlobVolumeCreateBlobRaceDeadline(t *testing.T) {
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-
-       v := NewTestableAzureBlobVolume(t, false, 3)
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeCreateBlobRaceDeadline(c *check.C) {
+       v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
+       v.AzureBlobVolume.WriteRaceInterval.Set("2s")
+       v.AzureBlobVolume.WriteRacePollTime.Set("5ms")
        defer v.Teardown()
 
-       azureWriteRaceInterval = 2 * time.Second
-       azureWriteRacePollTime = 5 * time.Millisecond
-
        v.PutRaw(TestHash, nil)
 
        buf := new(bytes.Buffer)
        v.IndexTo("", buf)
        if buf.Len() != 0 {
-               t.Errorf("Index %+q should be empty", buf.Bytes())
+               c.Errorf("Index %+q should be empty", buf.Bytes())
        }
 
        v.TouchWithDate(TestHash, time.Now().Add(-1982*time.Millisecond))
@@ -618,56 +556,49 @@ func TestAzureBlobVolumeCreateBlobRaceDeadline(t *testing.T) {
                buf := make([]byte, BlockSize)
                n, err := v.Get(context.Background(), TestHash, buf)
                if err != nil {
-                       t.Error(err)
+                       c.Error(err)
                        return
                }
                if n != 0 {
-                       t.Errorf("Got %+q, expected empty buf", buf[:n])
+                       c.Errorf("Got %+q, expected empty buf", buf[:n])
                }
        }()
        select {
        case <-allDone:
        case <-time.After(time.Second):
-               t.Error("Get should have stopped waiting for race when block was 2s old")
+               c.Error("Get should have stopped waiting for race when block was 2s old")
        }
 
        buf.Reset()
        v.IndexTo("", buf)
        if !bytes.HasPrefix(buf.Bytes(), []byte(TestHash+"+0")) {
-               t.Errorf("Index %+q should have %+q", buf.Bytes(), TestHash+"+0")
+               c.Errorf("Index %+q should have %+q", buf.Bytes(), TestHash+"+0")
        }
 }
 
-func TestAzureBlobVolumeContextCancelGet(t *testing.T) {
-       testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelGet(c *check.C) {
+       s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *TestableAzureBlobVolume) error {
                v.PutRaw(TestHash, TestBlock)
                _, err := v.Get(ctx, TestHash, make([]byte, BlockSize))
                return err
        })
 }
 
-func TestAzureBlobVolumeContextCancelPut(t *testing.T) {
-       testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelPut(c *check.C) {
+       s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *TestableAzureBlobVolume) error {
                return v.Put(ctx, TestHash, make([]byte, BlockSize))
        })
 }
 
-func TestAzureBlobVolumeContextCancelCompare(t *testing.T) {
-       testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+func (s *StubbedAzureBlobSuite) TestAzureBlobVolumeContextCancelCompare(c *check.C) {
+       s.testAzureBlobVolumeContextCancel(c, func(ctx context.Context, v *TestableAzureBlobVolume) error {
                v.PutRaw(TestHash, TestBlock)
                return v.Compare(ctx, TestHash, TestBlock2)
        })
 }
 
-func testAzureBlobVolumeContextCancel(t *testing.T, testFunc func(context.Context, *TestableAzureBlobVolume) error) {
-       defer func(t http.RoundTripper) {
-               http.DefaultTransport = t
-       }(http.DefaultTransport)
-       http.DefaultTransport = &http.Transport{
-               Dial: (&azStubDialer{}).Dial,
-       }
-
-       v := NewTestableAzureBlobVolume(t, false, 3)
+func (s *StubbedAzureBlobSuite) testAzureBlobVolumeContextCancel(c *check.C, testFunc func(context.Context, *TestableAzureBlobVolume) error) {
+       v := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
        defer v.Teardown()
        v.azHandler.race = make(chan chan struct{})
 
@@ -677,15 +608,15 @@ func testAzureBlobVolumeContextCancel(t *testing.T, testFunc func(context.Contex
                defer close(allDone)
                err := testFunc(ctx, v)
                if err != context.Canceled {
-                       t.Errorf("got %T %q, expected %q", err, err, context.Canceled)
+                       c.Errorf("got %T %q, expected %q", err, err, context.Canceled)
                }
        }()
        releaseHandler := make(chan struct{})
        select {
        case <-allDone:
-               t.Error("testFunc finished without waiting for v.azHandler.race")
+               c.Error("testFunc finished without waiting for v.azHandler.race")
        case <-time.After(10 * time.Second):
-               t.Error("timed out waiting to enter handler")
+               c.Error("timed out waiting to enter handler")
        case v.azHandler.race <- releaseHandler:
        }
 
@@ -693,7 +624,7 @@ func testAzureBlobVolumeContextCancel(t *testing.T, testFunc func(context.Contex
 
        select {
        case <-time.After(10 * time.Second):
-               t.Error("timed out waiting to cancel")
+               c.Error("timed out waiting to cancel")
        case <-allDone:
        }
 
@@ -703,8 +634,11 @@ func testAzureBlobVolumeContextCancel(t *testing.T, testFunc func(context.Contex
 }
 
 func (s *StubbedAzureBlobSuite) TestStats(c *check.C) {
+       volume := s.newTestableAzureBlobVolume(c, testCluster(c), arvados.Volume{Replication: 3}, newVolumeMetricsVecs(prometheus.NewRegistry()))
+       defer volume.Teardown()
+
        stats := func() string {
-               buf, err := json.Marshal(s.volume.InternalStats())
+               buf, err := json.Marshal(volume.InternalStats())
                c.Check(err, check.IsNil)
                return string(buf)
        }
@@ -713,37 +647,25 @@ func (s *StubbedAzureBlobSuite) TestStats(c *check.C) {
        c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
 
        loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-       _, err := s.volume.Get(context.Background(), loc, make([]byte, 3))
+       _, err := volume.Get(context.Background(), loc, make([]byte, 3))
        c.Check(err, check.NotNil)
        c.Check(stats(), check.Matches, `.*"Ops":[^0],.*`)
        c.Check(stats(), check.Matches, `.*"Errors":[^0],.*`)
        c.Check(stats(), check.Matches, `.*"storage\.AzureStorageServiceError 404 \(404 Not Found\)":[^0].*`)
        c.Check(stats(), check.Matches, `.*"InBytes":0,.*`)
 
-       err = s.volume.Put(context.Background(), loc, []byte("foo"))
+       err = volume.Put(context.Background(), loc, []byte("foo"))
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
        c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
 
-       _, err = s.volume.Get(context.Background(), loc, make([]byte, 3))
+       _, err = volume.Get(context.Background(), loc, make([]byte, 3))
        c.Check(err, check.IsNil)
-       _, err = s.volume.Get(context.Background(), loc, make([]byte, 3))
+       _, err = volume.Get(context.Background(), loc, make([]byte, 3))
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
 }
 
-func (s *StubbedAzureBlobSuite) TestConfig(c *check.C) {
-       var cfg Config
-       err := yaml.Unmarshal([]byte(`
-Volumes:
-  - Type: Azure
-    StorageClasses: ["class_a", "class_b"]
-`), &cfg)
-
-       c.Check(err, check.IsNil)
-       c.Check(cfg.Volumes[0].GetStorageClasses(), check.DeepEquals, []string{"class_a", "class_b"})
-}
-
 func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
        v.azHandler.PutRaw(v.ContainerName, locator, data)
 }
@@ -760,17 +682,6 @@ func (v *TestableAzureBlobVolume) ReadWriteOperationLabelValues() (r, w string)
        return "get", "create"
 }
 
-func (v *TestableAzureBlobVolume) DeviceID() string {
-       // Dummy device id for testing purposes
-       return "azure://azure_blob_volume_test"
-}
-
-func (v *TestableAzureBlobVolume) Start(vm *volumeMetricsVecs) error {
-       // Override original Start() to be able to assign CounterVecs with a dummy DeviceID
-       v.container.stats.opsCounters, v.container.stats.errCounters, v.container.stats.ioBytes = vm.getCounterVecsFor(prometheus.Labels{"device_id": v.DeviceID()})
-       return nil
-}
-
 func makeEtag() string {
        return fmt.Sprintf("0x%x", rand.Int63())
 }
index d2e7c9ebd3460b75855ca830ab05f6f9f5ab02b9..623693cd12e507002d8ee29272af8185fe6965fb 100644 (file)
@@ -8,9 +8,12 @@ import (
        "sync"
        "sync/atomic"
        "time"
+
+       "github.com/sirupsen/logrus"
 )
 
 type bufferPool struct {
+       log logrus.FieldLogger
        // limiter has a "true" placeholder for each in-use buffer.
        limiter chan bool
        // allocated is the number of bytes currently allocated to buffers.
@@ -19,9 +22,9 @@ type bufferPool struct {
        sync.Pool
 }
 
-func newBufferPool(count int, bufSize int) *bufferPool {
-       p := bufferPool{}
-       p.New = func() interface{} {
+func newBufferPool(log logrus.FieldLogger, count int, bufSize int) *bufferPool {
+       p := bufferPool{log: log}
+       p.Pool.New = func() interface{} {
                atomic.AddUint64(&p.allocated, uint64(bufSize))
                return make([]byte, bufSize)
        }
@@ -34,13 +37,13 @@ func (p *bufferPool) Get(size int) []byte {
        case p.limiter <- true:
        default:
                t0 := time.Now()
-               log.Printf("reached max buffers (%d), waiting", cap(p.limiter))
+               p.log.Printf("reached max buffers (%d), waiting", cap(p.limiter))
                p.limiter <- true
-               log.Printf("waited %v for a buffer", time.Since(t0))
+               p.log.Printf("waited %v for a buffer", time.Since(t0))
        }
        buf := p.Pool.Get().([]byte)
        if cap(buf) < size {
-               log.Fatalf("bufferPool Get(size=%d) but max=%d", size, cap(buf))
+               p.log.Fatalf("bufferPool Get(size=%d) but max=%d", size, cap(buf))
        }
        return buf[:size]
 }
index 21b03edd49b967356c74958b1a1d762851babb60..2afa0ddb81b39bd0b48ca8f142a8f5ceccf8e159 100644 (file)
@@ -5,8 +5,10 @@
 package main
 
 import (
+       "context"
        "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        . "gopkg.in/check.v1"
 )
 
@@ -17,16 +19,16 @@ type BufferPoolSuite struct{}
 // Initialize a default-sized buffer pool for the benefit of test
 // suites that don't run main().
 func init() {
-       bufs = newBufferPool(theConfig.MaxBuffers, BlockSize)
+       bufs = newBufferPool(ctxlog.FromContext(context.Background()), 12, BlockSize)
 }
 
 // Restore sane default after bufferpool's own tests
 func (s *BufferPoolSuite) TearDownTest(c *C) {
-       bufs = newBufferPool(theConfig.MaxBuffers, BlockSize)
+       bufs = newBufferPool(ctxlog.FromContext(context.Background()), 12, BlockSize)
 }
 
 func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
-       bufs := newBufferPool(2, 10)
+       bufs := newBufferPool(ctxlog.TestLogger(c), 2, 10)
        b1 := bufs.Get(1)
        bufs.Get(2)
        bufs.Put(b1)
@@ -35,14 +37,14 @@ func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
 }
 
 func (s *BufferPoolSuite) TestBufferPoolUnderLimit(c *C) {
-       bufs := newBufferPool(3, 10)
+       bufs := newBufferPool(ctxlog.TestLogger(c), 3, 10)
        b1 := bufs.Get(10)
        bufs.Get(10)
        testBufferPoolRace(c, bufs, b1, "Get")
 }
 
 func (s *BufferPoolSuite) TestBufferPoolAtLimit(c *C) {
-       bufs := newBufferPool(2, 10)
+       bufs := newBufferPool(ctxlog.TestLogger(c), 2, 10)
        b1 := bufs.Get(10)
        bufs.Get(10)
        testBufferPoolRace(c, bufs, b1, "Put")
@@ -66,7 +68,7 @@ func testBufferPoolRace(c *C, bufs *bufferPool, unused []byte, expectWin string)
 }
 
 func (s *BufferPoolSuite) TestBufferPoolReuse(c *C) {
-       bufs := newBufferPool(2, 10)
+       bufs := newBufferPool(ctxlog.TestLogger(c), 2, 10)
        bufs.Get(10)
        last := bufs.Get(10)
        // The buffer pool is allowed to throw away unused buffers
diff --git a/services/keepstore/command.go b/services/keepstore/command.go
new file mode 100644 (file)
index 0000000..c589e63
--- /dev/null
@@ -0,0 +1,219 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "context"
+       "errors"
+       "flag"
+       "fmt"
+       "io"
+       "math/rand"
+       "net/http"
+       "os"
+       "sync"
+
+       "git.curoverse.com/arvados.git/lib/config"
+       "git.curoverse.com/arvados.git/lib/service"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
+)
+
+var (
+       version = "dev"
+       Command = service.Command(arvados.ServiceNameKeepstore, newHandlerOrErrorHandler)
+)
+
+func main() {
+       os.Exit(runCommand(os.Args[0], os.Args[1:], os.Stdin, os.Stdout, os.Stderr))
+}
+
+func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       args, ok := convertKeepstoreFlagsToServiceFlags(args, ctxlog.FromContext(context.Background()))
+       if !ok {
+               return 2
+       }
+       return Command.RunCommand(prog, args, stdin, stdout, stderr)
+}
+
+// Parse keepstore command line flags, and return equivalent
+// service.Command flags. The second return value ("ok") is true if
+// all provided flags were successfully converted.
+func convertKeepstoreFlagsToServiceFlags(args []string, lgr logrus.FieldLogger) ([]string, bool) {
+       flags := flag.NewFlagSet("", flag.ContinueOnError)
+       flags.String("listen", "", "Services.Keepstore.InternalURLs")
+       flags.Int("max-buffers", 0, "API.MaxKeepBlobBuffers")
+       flags.Int("max-requests", 0, "API.MaxConcurrentRequests")
+       flags.Bool("never-delete", false, "Collections.BlobTrash")
+       flags.Bool("enforce-permissions", false, "Collections.BlobSigning")
+       flags.String("permission-key-file", "", "Collections.BlobSigningKey")
+       flags.String("blob-signing-key-file", "", "Collections.BlobSigningKey")
+       flags.String("data-manager-token-file", "", "SystemRootToken")
+       flags.Int("permission-ttl", 0, "Collections.BlobSigningTTL")
+       flags.Int("blob-signature-ttl", 0, "Collections.BlobSigningTTL")
+       flags.String("trash-lifetime", "", "Collections.BlobTrashLifetime")
+       flags.Bool("serialize", false, "Volumes.*.DriverParameters.Serialize")
+       flags.Bool("readonly", false, "Volumes.*.ReadOnly")
+       flags.String("pid", "", "-")
+       flags.String("trash-check-interval", "", "Collections.BlobTrashCheckInterval")
+
+       flags.String("azure-storage-container-volume", "", "Volumes.*.Driver")
+       flags.String("azure-storage-account-name", "", "Volumes.*.DriverParameters.StorageAccountName")
+       flags.String("azure-storage-account-key-file", "", "Volumes.*.DriverParameters.StorageAccountKey")
+       flags.String("azure-storage-replication", "", "Volumes.*.Replication")
+       flags.String("azure-max-get-bytes", "", "Volumes.*.DriverParameters.MaxDataReadSize")
+
+       flags.String("s3-bucket-volume", "", "Volumes.*.DriverParameters.Bucket")
+       flags.String("s3-region", "", "Volumes.*.DriverParameters.Region")
+       flags.String("s3-endpoint", "", "Volumes.*.DriverParameters.Endpoint")
+       flags.String("s3-access-key-file", "", "Volumes.*.DriverParameters.AccessKey")
+       flags.String("s3-secret-key-file", "", "Volumes.*.DriverParameters.SecretKey")
+       flags.String("s3-race-window", "", "Volumes.*.DriverParameters.RaceWindow")
+       flags.String("s3-replication", "", "Volumes.*.Replication")
+       flags.String("s3-unsafe-delete", "", "Volumes.*.DriverParameters.UnsafeDelete")
+
+       flags.String("volume", "", "Volumes")
+
+       flags.Bool("version", false, "")
+       flags.String("config", "", "")
+       flags.String("legacy-keepstore-config", "", "")
+
+       err := flags.Parse(args)
+       if err == flag.ErrHelp {
+               return []string{"-help"}, true
+       } else if err != nil {
+               return nil, false
+       }
+
+       args = nil
+       ok := true
+       flags.Visit(func(f *flag.Flag) {
+               if f.Name == "config" || f.Name == "legacy-keepstore-config" || f.Name == "version" {
+                       args = append(args, "-"+f.Name, f.Value.String())
+               } else if f.Usage == "-" {
+                       ok = false
+                       lgr.Errorf("command line flag -%s is no longer supported", f.Name)
+               } else {
+                       ok = false
+                       lgr.Errorf("command line flag -%s is no longer supported -- use Clusters.*.%s in cluster config file instead", f.Name, f.Usage)
+               }
+       })
+       if !ok {
+               return nil, false
+       }
+
+       flags = flag.NewFlagSet("", flag.ExitOnError)
+       loader := config.NewLoader(nil, lgr)
+       loader.SetupFlags(flags)
+       return loader.MungeLegacyConfigArgs(lgr, args, "-legacy-keepstore-config"), true
+}
+
+type handler struct {
+       http.Handler
+       Cluster *arvados.Cluster
+       Logger  logrus.FieldLogger
+
+       pullq      *WorkQueue
+       trashq     *WorkQueue
+       volmgr     *RRVolumeManager
+       keepClient *keepclient.KeepClient
+
+       err       error
+       setupOnce sync.Once
+}
+
+func (h *handler) CheckHealth() error {
+       return h.err
+}
+
+func newHandlerOrErrorHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
+       var h handler
+       serviceURL, ok := service.URLFromContext(ctx)
+       if !ok {
+               return service.ErrorHandler(ctx, cluster, errors.New("BUG: no URL from service.URLFromContext"))
+       }
+       err := h.setup(ctx, cluster, token, reg, serviceURL)
+       if err != nil {
+               return service.ErrorHandler(ctx, cluster, err)
+       }
+       return &h
+}
+
+func (h *handler) setup(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry, serviceURL arvados.URL) error {
+       h.Cluster = cluster
+       h.Logger = ctxlog.FromContext(ctx)
+       if h.Cluster.API.MaxKeepBlobBuffers <= 0 {
+               return fmt.Errorf("API.MaxKeepBlobBuffers must be greater than zero")
+       }
+       bufs = newBufferPool(h.Logger, h.Cluster.API.MaxKeepBlobBuffers, BlockSize)
+
+       if h.Cluster.API.MaxConcurrentRequests < 1 {
+               h.Cluster.API.MaxConcurrentRequests = h.Cluster.API.MaxKeepBlobBuffers * 2
+               h.Logger.Warnf("API.MaxConcurrentRequests <1 or not specified; defaulting to MaxKeepBlobBuffers * 2 == %d", h.Cluster.API.MaxConcurrentRequests)
+       }
+
+       if h.Cluster.Collections.BlobSigningKey != "" {
+       } else if h.Cluster.Collections.BlobSigning {
+               return errors.New("cannot enable Collections.BlobSigning with no Collections.BlobSigningKey")
+       } else {
+               h.Logger.Warn("Running without a blob signing key. Block locators returned by this server will not be signed, and will be rejected by a server that enforces permissions. To fix this, configure Collections.BlobSigning and Collections.BlobSigningKey.")
+       }
+
+       if len(h.Cluster.Volumes) == 0 {
+               return errors.New("no volumes configured")
+       }
+
+       h.Logger.Printf("keepstore %s starting, pid %d", version, os.Getpid())
+
+       // Start a round-robin VolumeManager with the configured volumes.
+       vm, err := makeRRVolumeManager(h.Logger, h.Cluster, serviceURL, newVolumeMetricsVecs(reg))
+       if err != nil {
+               return err
+       }
+       if len(vm.readables) == 0 {
+               return fmt.Errorf("no volumes configured for %s", serviceURL)
+       }
+       h.volmgr = vm
+
+       // Initialize the pullq and workers
+       h.pullq = NewWorkQueue()
+       for i := 0; i < 1 || i < h.Cluster.Collections.BlobReplicateConcurrency; i++ {
+               go h.runPullWorker(h.pullq)
+       }
+
+       // Initialize the trashq and workers
+       h.trashq = NewWorkQueue()
+       for i := 0; i < 1 || i < h.Cluster.Collections.BlobTrashConcurrency; i++ {
+               go RunTrashWorker(h.volmgr, h.Cluster, h.trashq)
+       }
+
+       // Set up routes and metrics
+       h.Handler = MakeRESTRouter(ctx, cluster, reg, vm, h.pullq, h.trashq)
+
+       // Initialize keepclient for pull workers
+       c, err := arvados.NewClientFromConfig(cluster)
+       if err != nil {
+               return err
+       }
+       ac, err := arvadosclient.New(c)
+       if err != nil {
+               return err
+       }
+       h.keepClient = &keepclient.KeepClient{
+               Arvados:       ac,
+               Want_replicas: 1,
+       }
+       h.keepClient.Arvados.ApiToken = fmt.Sprintf("%x", rand.Int63())
+
+       if d := h.Cluster.Collections.BlobTrashCheckInterval.Duration(); d > 0 {
+               go emptyTrash(h.volmgr.writables, d)
+       }
+
+       return nil
+}
diff --git a/services/keepstore/command_test.go b/services/keepstore/command_test.go
new file mode 100644 (file)
index 0000000..ad2aa09
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "bytes"
+       "io/ioutil"
+       "os"
+
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CommandSuite{})
+
+type CommandSuite struct{}
+
+func (*CommandSuite) TestLegacyConfigPath(c *check.C) {
+       var stdin, stdout, stderr bytes.Buffer
+       tmp, err := ioutil.TempFile("", "")
+       c.Assert(err, check.IsNil)
+       defer os.Remove(tmp.Name())
+       tmp.Write([]byte("Listen: \"1.2.3.4.5:invalidport\"\n"))
+       tmp.Close()
+       exited := runCommand("keepstore", []string{"-config", tmp.Name()}, &stdin, &stdout, &stderr)
+       c.Check(exited, check.Equals, 1)
+       c.Check(stderr.String(), check.Matches, `(?ms).*unable to migrate Listen value.*`)
+}
diff --git a/services/keepstore/config.go b/services/keepstore/config.go
deleted file mode 100644 (file)
index 43a2191..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "bytes"
-       "encoding/json"
-       "fmt"
-       "io/ioutil"
-       "strings"
-       "time"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/prometheus/client_golang/prometheus"
-       "github.com/sirupsen/logrus"
-)
-
-type Config struct {
-       Debug  bool
-       Listen string
-
-       LogFormat string
-
-       PIDFile string
-
-       MaxBuffers  int
-       MaxRequests int
-
-       BlobSignatureTTL    arvados.Duration
-       BlobSigningKeyFile  string
-       RequireSignatures   bool
-       SystemAuthTokenFile string
-       EnableDelete        bool
-       TrashLifetime       arvados.Duration
-       TrashCheckInterval  arvados.Duration
-       PullWorkers         int
-       TrashWorkers        int
-       EmptyTrashWorkers   int
-       TLSCertificateFile  string
-       TLSKeyFile          string
-
-       Volumes VolumeList
-
-       blobSigningKey  []byte
-       systemAuthToken string
-       debugLogf       func(string, ...interface{})
-
-       ManagementToken string
-}
-
-var (
-       theConfig = DefaultConfig()
-       formatter = map[string]logrus.Formatter{
-               "text": &logrus.TextFormatter{
-                       FullTimestamp:   true,
-                       TimestampFormat: rfc3339NanoFixed,
-               },
-               "json": &logrus.JSONFormatter{
-                       TimestampFormat: rfc3339NanoFixed,
-               },
-       }
-       log = logrus.StandardLogger()
-)
-
-const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
-
-// DefaultConfig returns the default configuration.
-func DefaultConfig() *Config {
-       return &Config{
-               Listen:             ":25107",
-               LogFormat:          "json",
-               MaxBuffers:         128,
-               RequireSignatures:  true,
-               BlobSignatureTTL:   arvados.Duration(14 * 24 * time.Hour),
-               TrashLifetime:      arvados.Duration(14 * 24 * time.Hour),
-               TrashCheckInterval: arvados.Duration(24 * time.Hour),
-               Volumes:            []Volume{},
-       }
-}
-
-// Start should be called exactly once: after setting all public
-// fields, and before using the config.
-func (cfg *Config) Start(reg *prometheus.Registry) error {
-       if cfg.Debug {
-               log.Level = logrus.DebugLevel
-               cfg.debugLogf = log.Printf
-               cfg.debugLogf("debugging enabled")
-       } else {
-               log.Level = logrus.InfoLevel
-               cfg.debugLogf = func(string, ...interface{}) {}
-       }
-
-       f := formatter[strings.ToLower(cfg.LogFormat)]
-       if f == nil {
-               return fmt.Errorf(`unsupported log format %q (try "text" or "json")`, cfg.LogFormat)
-       }
-       log.Formatter = f
-
-       if cfg.MaxBuffers < 0 {
-               return fmt.Errorf("MaxBuffers must be greater than zero")
-       }
-       bufs = newBufferPool(cfg.MaxBuffers, BlockSize)
-
-       if cfg.MaxRequests < 1 {
-               cfg.MaxRequests = cfg.MaxBuffers * 2
-               log.Printf("MaxRequests <1 or not specified; defaulting to MaxBuffers * 2 == %d", cfg.MaxRequests)
-       }
-
-       if cfg.BlobSigningKeyFile != "" {
-               buf, err := ioutil.ReadFile(cfg.BlobSigningKeyFile)
-               if err != nil {
-                       return fmt.Errorf("reading blob signing key file: %s", err)
-               }
-               cfg.blobSigningKey = bytes.TrimSpace(buf)
-               if len(cfg.blobSigningKey) == 0 {
-                       return fmt.Errorf("blob signing key file %q is empty", cfg.BlobSigningKeyFile)
-               }
-       } else if cfg.RequireSignatures {
-               return fmt.Errorf("cannot enable RequireSignatures (-enforce-permissions) without a blob signing key")
-       } else {
-               log.Println("Running without a blob signing key. Block locators " +
-                       "returned by this server will not be signed, and will be rejected " +
-                       "by a server that enforces permissions.")
-               log.Println("To fix this, use the BlobSigningKeyFile config entry.")
-       }
-
-       if fn := cfg.SystemAuthTokenFile; fn != "" {
-               buf, err := ioutil.ReadFile(fn)
-               if err != nil {
-                       return fmt.Errorf("cannot read system auth token file %q: %s", fn, err)
-               }
-               cfg.systemAuthToken = strings.TrimSpace(string(buf))
-       }
-
-       if cfg.EnableDelete {
-               log.Print("Trash/delete features are enabled. WARNING: this has not " +
-                       "been extensively tested. You should disable this unless you can afford to lose data.")
-       }
-
-       if len(cfg.Volumes) == 0 {
-               if (&unixVolumeAdder{cfg}).Discover() == 0 {
-                       return fmt.Errorf("no volumes found")
-               }
-       }
-       vm := newVolumeMetricsVecs(reg)
-       for _, v := range cfg.Volumes {
-               if err := v.Start(vm); err != nil {
-                       return fmt.Errorf("volume %s: %s", v, err)
-               }
-               log.Printf("Using volume %v (writable=%v)", v, v.Writable())
-       }
-       return nil
-}
-
-// VolumeTypes is built up by init() funcs in the source files that
-// define the volume types.
-var VolumeTypes = []func() VolumeWithExamples{}
-
-type VolumeList []Volume
-
-// UnmarshalJSON -- given an array of objects -- deserializes each
-// object as the volume type indicated by the object's Type field.
-func (vl *VolumeList) UnmarshalJSON(data []byte) error {
-       typeMap := map[string]func() VolumeWithExamples{}
-       for _, factory := range VolumeTypes {
-               t := factory().Type()
-               if _, ok := typeMap[t]; ok {
-                       log.Fatalf("volume type %+q is claimed by multiple VolumeTypes", t)
-               }
-               typeMap[t] = factory
-       }
-
-       var mapList []map[string]interface{}
-       err := json.Unmarshal(data, &mapList)
-       if err != nil {
-               return err
-       }
-       for _, mapIn := range mapList {
-               typeIn, ok := mapIn["Type"].(string)
-               if !ok {
-                       return fmt.Errorf("invalid volume type %+v", mapIn["Type"])
-               }
-               factory, ok := typeMap[typeIn]
-               if !ok {
-                       return fmt.Errorf("unsupported volume type %+q", typeIn)
-               }
-               data, err := json.Marshal(mapIn)
-               if err != nil {
-                       return err
-               }
-               vol := factory()
-               err = json.Unmarshal(data, vol)
-               if err != nil {
-                       return err
-               }
-               *vl = append(*vl, vol)
-       }
-       return nil
-}
-
-// MarshalJSON adds a "Type" field to each volume corresponding to its
-// Type().
-func (vl *VolumeList) MarshalJSON() ([]byte, error) {
-       data := []byte{'['}
-       for _, vs := range *vl {
-               j, err := json.Marshal(vs)
-               if err != nil {
-                       return nil, err
-               }
-               if len(data) > 1 {
-                       data = append(data, byte(','))
-               }
-               t, err := json.Marshal(vs.Type())
-               if err != nil {
-                       panic(err)
-               }
-               data = append(data, j[0])
-               data = append(data, []byte(`"Type":`)...)
-               data = append(data, t...)
-               data = append(data, byte(','))
-               data = append(data, j[1:]...)
-       }
-       return append(data, byte(']')), nil
-}
diff --git a/services/keepstore/config_test.go b/services/keepstore/config_test.go
deleted file mode 100644 (file)
index e3b0ffc..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "github.com/sirupsen/logrus"
-)
-
-func init() {
-       log.Level = logrus.DebugLevel
-       theConfig.debugLogf = log.Printf
-}
diff --git a/services/keepstore/deprecated.go b/services/keepstore/deprecated.go
deleted file mode 100644 (file)
index d137797..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "flag"
-       "time"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-)
-
-type deprecatedOptions struct {
-       flagSerializeIO     bool
-       flagReadonly        bool
-       neverDelete         bool
-       signatureTTLSeconds int
-}
-
-var deprecated = deprecatedOptions{
-       neverDelete:         !theConfig.EnableDelete,
-       signatureTTLSeconds: int(theConfig.BlobSignatureTTL.Duration() / time.Second),
-}
-
-func (depr *deprecatedOptions) beforeFlagParse(cfg *Config) {
-       flag.StringVar(&cfg.Listen, "listen", cfg.Listen, "see Listen configuration")
-       flag.IntVar(&cfg.MaxBuffers, "max-buffers", cfg.MaxBuffers, "see MaxBuffers configuration")
-       flag.IntVar(&cfg.MaxRequests, "max-requests", cfg.MaxRequests, "see MaxRequests configuration")
-       flag.BoolVar(&depr.neverDelete, "never-delete", depr.neverDelete, "see EnableDelete configuration")
-       flag.BoolVar(&cfg.RequireSignatures, "enforce-permissions", cfg.RequireSignatures, "see RequireSignatures configuration")
-       flag.StringVar(&cfg.BlobSigningKeyFile, "permission-key-file", cfg.BlobSigningKeyFile, "see BlobSigningKey`File` configuration")
-       flag.StringVar(&cfg.BlobSigningKeyFile, "blob-signing-key-file", cfg.BlobSigningKeyFile, "see BlobSigningKey`File` configuration")
-       flag.StringVar(&cfg.SystemAuthTokenFile, "data-manager-token-file", cfg.SystemAuthTokenFile, "see SystemAuthToken`File` configuration")
-       flag.IntVar(&depr.signatureTTLSeconds, "permission-ttl", depr.signatureTTLSeconds, "signature TTL in seconds; see BlobSignatureTTL configuration")
-       flag.IntVar(&depr.signatureTTLSeconds, "blob-signature-ttl", depr.signatureTTLSeconds, "signature TTL in seconds; see BlobSignatureTTL configuration")
-       flag.Var(&cfg.TrashLifetime, "trash-lifetime", "see TrashLifetime configuration")
-       flag.BoolVar(&depr.flagSerializeIO, "serialize", depr.flagSerializeIO, "serialize read and write operations on the following volumes.")
-       flag.BoolVar(&depr.flagReadonly, "readonly", depr.flagReadonly, "do not write, delete, or touch anything on the following volumes.")
-       flag.StringVar(&cfg.PIDFile, "pid", cfg.PIDFile, "see `PIDFile` configuration")
-       flag.Var(&cfg.TrashCheckInterval, "trash-check-interval", "see TrashCheckInterval configuration")
-}
-
-func (depr *deprecatedOptions) afterFlagParse(cfg *Config) {
-       cfg.BlobSignatureTTL = arvados.Duration(depr.signatureTTLSeconds) * arvados.Duration(time.Second)
-       cfg.EnableDelete = !depr.neverDelete
-}
index ad907ef10138f213e3831223d867fd3c114736d9..9d69b9fa47ef1b173b70e1f16617ba6dd9531351 100644 (file)
@@ -23,16 +23,49 @@ import (
        "os"
        "regexp"
        "strings"
-       "testing"
        "time"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/prometheus/client_golang/prometheus"
+       check "gopkg.in/check.v1"
 )
 
-var testCluster = &arvados.Cluster{
-       ClusterID: "zzzzz",
+var testServiceURL = func() arvados.URL {
+       return arvados.URL{Host: "localhost:12345", Scheme: "http"}
+}()
+
+func testCluster(t TB) *arvados.Cluster {
+       cfg, err := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), ctxlog.TestLogger(t)).Load()
+       if err != nil {
+               t.Fatal(err)
+       }
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               t.Fatal(err)
+       }
+       cluster.SystemRootToken = arvadostest.DataManagerToken
+       cluster.ManagementToken = arvadostest.ManagementToken
+       cluster.Collections.BlobSigning = false
+       return cluster
+}
+
+var _ = check.Suite(&HandlerSuite{})
+
+type HandlerSuite struct {
+       cluster *arvados.Cluster
+       handler *handler
+}
+
+func (s *HandlerSuite) SetUpTest(c *check.C) {
+       s.cluster = testCluster(c)
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-000000000000000": {Replication: 1, Driver: "mock"},
+               "zzzzz-nyw5e-111111111111111": {Replication: 1, Driver: "mock"},
+       }
+       s.handler = &handler{}
 }
 
 // A RequestTester represents the parameters for an HTTP request to
@@ -52,46 +85,41 @@ type RequestTester struct {
 //   - permissions on, authenticated request, expired locator
 //   - permissions on, authenticated request, signed locator, transient error from backend
 //
-func TestGetHandler(t *testing.T) {
-       defer teardown()
+func (s *HandlerSuite) TestGetHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-       // Prepare two test Keep volumes. Our block is stored on the second volume.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       vols := KeepVM.AllWritable()
-       if err := vols[0].Put(context.Background(), TestHash, TestBlock); err != nil {
-               t.Error(err)
-       }
+       vols := s.handler.volmgr.AllWritable()
+       err := vols[0].Put(context.Background(), TestHash, TestBlock)
+       c.Check(err, check.IsNil)
 
        // Create locators for testing.
        // Turn on permission settings so we can generate signed locators.
-       theConfig.RequireSignatures = true
-       theConfig.blobSigningKey = []byte(knownKey)
-       theConfig.BlobSignatureTTL.Set("5m")
+       s.cluster.Collections.BlobSigning = true
+       s.cluster.Collections.BlobSigningKey = knownKey
+       s.cluster.Collections.BlobSigningTTL.Set("5m")
 
        var (
                unsignedLocator  = "/" + TestHash
-               validTimestamp   = time.Now().Add(theConfig.BlobSignatureTTL.Duration())
+               validTimestamp   = time.Now().Add(s.cluster.Collections.BlobSigningTTL.Duration())
                expiredTimestamp = time.Now().Add(-time.Hour)
-               signedLocator    = "/" + SignLocator(TestHash, knownToken, validTimestamp)
-               expiredLocator   = "/" + SignLocator(TestHash, knownToken, expiredTimestamp)
+               signedLocator    = "/" + SignLocator(s.cluster, TestHash, knownToken, validTimestamp)
+               expiredLocator   = "/" + SignLocator(s.cluster, TestHash, knownToken, expiredTimestamp)
        )
 
        // -----------------
        // Test unauthenticated request with permissions off.
-       theConfig.RequireSignatures = false
+       s.cluster.Collections.BlobSigning = false
 
        // Unauthenticated request, unsigned locator
        // => OK
-       response := IssueRequest(
+       response := IssueRequest(s.handler,
                &RequestTester{
                        method: "GET",
                        uri:    unsignedLocator,
                })
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Unauthenticated request, unsigned locator", http.StatusOK, response)
-       ExpectBody(t,
+       ExpectBody(c,
                "Unauthenticated request, unsigned locator",
                string(TestBlock),
                response)
@@ -99,58 +127,58 @@ func TestGetHandler(t *testing.T) {
        receivedLen := response.Header().Get("Content-Length")
        expectedLen := fmt.Sprintf("%d", len(TestBlock))
        if receivedLen != expectedLen {
-               t.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
+               c.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
        }
 
        // ----------------
        // Permissions: on.
-       theConfig.RequireSignatures = true
+       s.cluster.Collections.BlobSigning = true
 
        // Authenticated request, signed locator
        // => OK
-       response = IssueRequest(&RequestTester{
+       response = IssueRequest(s.handler, &RequestTester{
                method:   "GET",
                uri:      signedLocator,
                apiToken: knownToken,
        })
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Authenticated request, signed locator", http.StatusOK, response)
-       ExpectBody(t,
+       ExpectBody(c,
                "Authenticated request, signed locator", string(TestBlock), response)
 
        receivedLen = response.Header().Get("Content-Length")
        expectedLen = fmt.Sprintf("%d", len(TestBlock))
        if receivedLen != expectedLen {
-               t.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
+               c.Errorf("expected Content-Length %s, got %s", expectedLen, receivedLen)
        }
 
        // Authenticated request, unsigned locator
        // => PermissionError
-       response = IssueRequest(&RequestTester{
+       response = IssueRequest(s.handler, &RequestTester{
                method:   "GET",
                uri:      unsignedLocator,
                apiToken: knownToken,
        })
-       ExpectStatusCode(t, "unsigned locator", PermissionError.HTTPCode, response)
+       ExpectStatusCode(c, "unsigned locator", PermissionError.HTTPCode, response)
 
        // Unauthenticated request, signed locator
        // => PermissionError
-       response = IssueRequest(&RequestTester{
+       response = IssueRequest(s.handler, &RequestTester{
                method: "GET",
                uri:    signedLocator,
        })
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Unauthenticated request, signed locator",
                PermissionError.HTTPCode, response)
 
        // Authenticated request, expired locator
        // => ExpiredError
-       response = IssueRequest(&RequestTester{
+       response = IssueRequest(s.handler, &RequestTester{
                method:   "GET",
                uri:      expiredLocator,
                apiToken: knownToken,
        })
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Authenticated request, expired locator",
                ExpiredError.HTTPCode, response)
 
@@ -158,16 +186,16 @@ func TestGetHandler(t *testing.T) {
        // => 503 Server busy (transient error)
 
        // Set up the block owning volume to respond with errors
-       vols[0].(*MockVolume).Bad = true
-       vols[0].(*MockVolume).BadVolumeError = VolumeBusyError
-       response = IssueRequest(&RequestTester{
+       vols[0].Volume.(*MockVolume).Bad = true
+       vols[0].Volume.(*MockVolume).BadVolumeError = VolumeBusyError
+       response = IssueRequest(s.handler, &RequestTester{
                method:   "GET",
                uri:      signedLocator,
                apiToken: knownToken,
        })
        // A transient error from one volume while the other doesn't find the block
        // should make the service return a 503 so that clients can retry.
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Volume backend busy",
                503, response)
 }
@@ -177,44 +205,42 @@ func TestGetHandler(t *testing.T) {
 //   - with server key, authenticated request, unsigned locator
 //   - with server key, unauthenticated request, unsigned locator
 //
-func TestPutHandler(t *testing.T) {
-       defer teardown()
-
-       // Prepare two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
+func (s *HandlerSuite) TestPutHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
        // --------------
        // No server key.
 
+       s.cluster.Collections.BlobSigningKey = ""
+
        // Unauthenticated request, no server key
        // => OK (unsigned response)
        unsignedLocator := "/" + TestHash
-       response := IssueRequest(
+       response := IssueRequest(s.handler,
                &RequestTester{
                        method:      "PUT",
                        uri:         unsignedLocator,
                        requestBody: TestBlock,
                })
 
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Unauthenticated request, no server key", http.StatusOK, response)
-       ExpectBody(t,
+       ExpectBody(c,
                "Unauthenticated request, no server key",
                TestHashPutResp, response)
 
        // ------------------
        // With a server key.
 
-       theConfig.blobSigningKey = []byte(knownKey)
-       theConfig.BlobSignatureTTL.Set("5m")
+       s.cluster.Collections.BlobSigningKey = knownKey
+       s.cluster.Collections.BlobSigningTTL.Set("5m")
 
        // When a permission key is available, the locator returned
        // from an authenticated PUT request will be signed.
 
        // Authenticated PUT, signed locator
        // => OK (signed response)
-       response = IssueRequest(
+       response = IssueRequest(s.handler,
                &RequestTester{
                        method:      "PUT",
                        uri:         unsignedLocator,
@@ -222,76 +248,72 @@ func TestPutHandler(t *testing.T) {
                        apiToken:    knownToken,
                })
 
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Authenticated PUT, signed locator, with server key",
                http.StatusOK, response)
        responseLocator := strings.TrimSpace(response.Body.String())
-       if VerifySignature(responseLocator, knownToken) != nil {
-               t.Errorf("Authenticated PUT, signed locator, with server key:\n"+
+       if VerifySignature(s.cluster, responseLocator, knownToken) != nil {
+               c.Errorf("Authenticated PUT, signed locator, with server key:\n"+
                        "response '%s' does not contain a valid signature",
                        responseLocator)
        }
 
        // Unauthenticated PUT, unsigned locator
        // => OK
-       response = IssueRequest(
+       response = IssueRequest(s.handler,
                &RequestTester{
                        method:      "PUT",
                        uri:         unsignedLocator,
                        requestBody: TestBlock,
                })
 
-       ExpectStatusCode(t,
+       ExpectStatusCode(c,
                "Unauthenticated PUT, unsigned locator, with server key",
                http.StatusOK, response)
-       ExpectBody(t,
+       ExpectBody(c,
                "Unauthenticated PUT, unsigned locator, with server key",
                TestHashPutResp, response)
 }
 
-func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
-       defer teardown()
-       theConfig.systemAuthToken = "fake-data-manager-token"
-       vols := []*MockVolume{CreateMockVolume(), CreateMockVolume()}
-       vols[0].Readonly = true
-       KeepVM = MakeRRVolumeManager([]Volume{vols[0], vols[1]})
-       defer KeepVM.Close()
-       IssueRequest(
+func (s *HandlerSuite) TestPutAndDeleteSkipReadonlyVolumes(c *check.C) {
+       s.cluster.Volumes["zzzzz-nyw5e-000000000000000"] = arvados.Volume{Driver: "mock", ReadOnly: true}
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+
+       s.cluster.SystemRootToken = "fake-data-manager-token"
+       IssueRequest(s.handler,
                &RequestTester{
                        method:      "PUT",
                        uri:         "/" + TestHash,
                        requestBody: TestBlock,
                })
-       defer func(orig bool) {
-               theConfig.EnableDelete = orig
-       }(theConfig.EnableDelete)
-       theConfig.EnableDelete = true
-       IssueRequest(
+
+       s.cluster.Collections.BlobTrash = true
+       IssueRequest(s.handler,
                &RequestTester{
                        method:      "DELETE",
                        uri:         "/" + TestHash,
                        requestBody: TestBlock,
-                       apiToken:    theConfig.systemAuthToken,
+                       apiToken:    s.cluster.SystemRootToken,
                })
        type expect struct {
-               volnum    int
+               volid     string
                method    string
                callcount int
        }
        for _, e := range []expect{
-               {0, "Get", 0},
-               {0, "Compare", 0},
-               {0, "Touch", 0},
-               {0, "Put", 0},
-               {0, "Delete", 0},
-               {1, "Get", 0},
-               {1, "Compare", 1},
-               {1, "Touch", 1},
-               {1, "Put", 1},
-               {1, "Delete", 1},
+               {"zzzzz-nyw5e-000000000000000", "Get", 0},
+               {"zzzzz-nyw5e-000000000000000", "Compare", 0},
+               {"zzzzz-nyw5e-000000000000000", "Touch", 0},
+               {"zzzzz-nyw5e-000000000000000", "Put", 0},
+               {"zzzzz-nyw5e-000000000000000", "Delete", 0},
+               {"zzzzz-nyw5e-111111111111111", "Get", 0},
+               {"zzzzz-nyw5e-111111111111111", "Compare", 1},
+               {"zzzzz-nyw5e-111111111111111", "Touch", 1},
+               {"zzzzz-nyw5e-111111111111111", "Put", 1},
+               {"zzzzz-nyw5e-111111111111111", "Delete", 1},
        } {
-               if calls := vols[e.volnum].CallCount(e.method); calls != e.callcount {
-                       t.Errorf("Got %d %s() on vol %d, expect %d", calls, e.method, e.volnum, e.callcount)
+               if calls := s.handler.volmgr.mountMap[e.volid].Volume.(*MockVolume).CallCount(e.method); calls != e.callcount {
+                       c.Errorf("Got %d %s() on vol %s, expect %d", calls, e.method, e.volid, e.callcount)
                }
        }
 }
@@ -305,24 +327,20 @@ func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
 //   - authenticated   /index/prefix request | superuser
 //
 // The only /index requests that should succeed are those issued by the
-// superuser. They should pass regardless of the value of RequireSignatures.
+// superuser. They should pass regardless of the value of BlobSigning.
 //
-func TestIndexHandler(t *testing.T) {
-       defer teardown()
+func (s *HandlerSuite) TestIndexHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-       // Set up Keep volumes and populate them.
        // Include multiple blocks on different volumes, and
        // some metadata files (which should be omitted from index listings)
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       vols := KeepVM.AllWritable()
+       vols := s.handler.volmgr.AllWritable()
        vols[0].Put(context.Background(), TestHash, TestBlock)
        vols[1].Put(context.Background(), TestHash2, TestBlock2)
        vols[0].Put(context.Background(), TestHash+".meta", []byte("metadata"))
        vols[1].Put(context.Background(), TestHash2+".meta", []byte("metadata"))
 
-       theConfig.systemAuthToken = "DATA MANAGER TOKEN"
+       s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
 
        unauthenticatedReq := &RequestTester{
                method: "GET",
@@ -336,7 +354,7 @@ func TestIndexHandler(t *testing.T) {
        superuserReq := &RequestTester{
                method:   "GET",
                uri:      "/index",
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
        unauthPrefixReq := &RequestTester{
                method: "GET",
@@ -350,76 +368,76 @@ func TestIndexHandler(t *testing.T) {
        superuserPrefixReq := &RequestTester{
                method:   "GET",
                uri:      "/index/" + TestHash[0:3],
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
        superuserNoSuchPrefixReq := &RequestTester{
                method:   "GET",
                uri:      "/index/abcd",
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
        superuserInvalidPrefixReq := &RequestTester{
                method:   "GET",
                uri:      "/index/xyz",
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
 
        // -------------------------------------------------------------
        // Only the superuser should be allowed to issue /index requests.
 
        // ---------------------------
-       // RequireSignatures enabled
+       // BlobSigning enabled
        // This setting should not affect tests passing.
-       theConfig.RequireSignatures = true
+       s.cluster.Collections.BlobSigning = true
 
        // unauthenticated /index request
        // => UnauthorizedError
-       response := IssueRequest(unauthenticatedReq)
-       ExpectStatusCode(t,
-               "RequireSignatures on, unauthenticated request",
+       response := IssueRequest(s.handler, unauthenticatedReq)
+       ExpectStatusCode(c,
+               "permissions on, unauthenticated request",
                UnauthorizedError.HTTPCode,
                response)
 
        // unauthenticated /index/prefix request
        // => UnauthorizedError
-       response = IssueRequest(unauthPrefixReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, unauthPrefixReq)
+       ExpectStatusCode(c,
                "permissions on, unauthenticated /index/prefix request",
                UnauthorizedError.HTTPCode,
                response)
 
        // authenticated /index request, non-superuser
        // => UnauthorizedError
-       response = IssueRequest(authenticatedReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, authenticatedReq)
+       ExpectStatusCode(c,
                "permissions on, authenticated request, non-superuser",
                UnauthorizedError.HTTPCode,
                response)
 
        // authenticated /index/prefix request, non-superuser
        // => UnauthorizedError
-       response = IssueRequest(authPrefixReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, authPrefixReq)
+       ExpectStatusCode(c,
                "permissions on, authenticated /index/prefix request, non-superuser",
                UnauthorizedError.HTTPCode,
                response)
 
        // superuser /index request
        // => OK
-       response = IssueRequest(superuserReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserReq)
+       ExpectStatusCode(c,
                "permissions on, superuser request",
                http.StatusOK,
                response)
 
        // ----------------------------
-       // RequireSignatures disabled
+       // BlobSigning disabled
        // Valid Request should still pass.
-       theConfig.RequireSignatures = false
+       s.cluster.Collections.BlobSigning = false
 
        // superuser /index request
        // => OK
-       response = IssueRequest(superuserReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserReq)
+       ExpectStatusCode(c,
                "permissions on, superuser request",
                http.StatusOK,
                response)
@@ -428,15 +446,15 @@ func TestIndexHandler(t *testing.T) {
                TestHash2 + `\+\d+ \d+\n\n$`
        match, _ := regexp.MatchString(expected, response.Body.String())
        if !match {
-               t.Errorf(
+               c.Errorf(
                        "permissions on, superuser request: expected %s, got:\n%s",
                        expected, response.Body.String())
        }
 
        // superuser /index/prefix request
        // => OK
-       response = IssueRequest(superuserPrefixReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserPrefixReq)
+       ExpectStatusCode(c,
                "permissions on, superuser request",
                http.StatusOK,
                response)
@@ -444,27 +462,27 @@ func TestIndexHandler(t *testing.T) {
        expected = `^` + TestHash + `\+\d+ \d+\n\n$`
        match, _ = regexp.MatchString(expected, response.Body.String())
        if !match {
-               t.Errorf(
+               c.Errorf(
                        "permissions on, superuser /index/prefix request: expected %s, got:\n%s",
                        expected, response.Body.String())
        }
 
        // superuser /index/{no-such-prefix} request
        // => OK
-       response = IssueRequest(superuserNoSuchPrefixReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserNoSuchPrefixReq)
+       ExpectStatusCode(c,
                "permissions on, superuser request",
                http.StatusOK,
                response)
 
        if "\n" != response.Body.String() {
-               t.Errorf("Expected empty response for %s. Found %s", superuserNoSuchPrefixReq.uri, response.Body.String())
+               c.Errorf("Expected empty response for %s. Found %s", superuserNoSuchPrefixReq.uri, response.Body.String())
        }
 
        // superuser /index/{invalid-prefix} request
        // => StatusBadRequest
-       response = IssueRequest(superuserInvalidPrefixReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserInvalidPrefixReq)
+       ExpectStatusCode(c,
                "permissions on, superuser request",
                http.StatusBadRequest,
                response)
@@ -496,27 +514,21 @@ func TestIndexHandler(t *testing.T) {
 //     (test for 200 OK, response with copies_deleted=0, copies_failed=1,
 //     confirm block not deleted)
 //
-func TestDeleteHandler(t *testing.T) {
-       defer teardown()
-
-       // Set up Keep volumes and populate them.
-       // Include multiple blocks on different volumes, and
-       // some metadata files (which should be omitted from index listings)
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
+func (s *HandlerSuite) TestDeleteHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-       vols := KeepVM.AllWritable()
+       vols := s.handler.volmgr.AllWritable()
        vols[0].Put(context.Background(), TestHash, TestBlock)
 
-       // Explicitly set the BlobSignatureTTL to 0 for these
+       // Explicitly set the BlobSigningTTL to 0 for these
        // tests, to ensure the MockVolume deletes the blocks
        // even though they have just been created.
-       theConfig.BlobSignatureTTL = arvados.Duration(0)
+       s.cluster.Collections.BlobSigningTTL = arvados.Duration(0)
 
        var userToken = "NOT DATA MANAGER TOKEN"
-       theConfig.systemAuthToken = "DATA MANAGER TOKEN"
+       s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
 
-       theConfig.EnableDelete = true
+       s.cluster.Collections.BlobTrash = true
 
        unauthReq := &RequestTester{
                method: "DELETE",
@@ -532,26 +544,26 @@ func TestDeleteHandler(t *testing.T) {
        superuserExistingBlockReq := &RequestTester{
                method:   "DELETE",
                uri:      "/" + TestHash,
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
 
        superuserNonexistentBlockReq := &RequestTester{
                method:   "DELETE",
                uri:      "/" + TestHash2,
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
 
        // Unauthenticated request returns PermissionError.
        var response *httptest.ResponseRecorder
-       response = IssueRequest(unauthReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, unauthReq)
+       ExpectStatusCode(c,
                "unauthenticated request",
                PermissionError.HTTPCode,
                response)
 
        // Authenticated non-admin request returns PermissionError.
-       response = IssueRequest(userReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, userReq)
+       ExpectStatusCode(c,
                "authenticated non-admin request",
                PermissionError.HTTPCode,
                response)
@@ -563,24 +575,24 @@ func TestDeleteHandler(t *testing.T) {
        }
        var responseDc, expectedDc deletecounter
 
-       response = IssueRequest(superuserNonexistentBlockReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserNonexistentBlockReq)
+       ExpectStatusCode(c,
                "data manager request, nonexistent block",
                http.StatusNotFound,
                response)
 
-       // Authenticated admin request for existing block while EnableDelete is false.
-       theConfig.EnableDelete = false
-       response = IssueRequest(superuserExistingBlockReq)
-       ExpectStatusCode(t,
+       // Authenticated admin request for existing block while BlobTrash is false.
+       s.cluster.Collections.BlobTrash = false
+       response = IssueRequest(s.handler, superuserExistingBlockReq)
+       ExpectStatusCode(c,
                "authenticated request, existing block, method disabled",
                MethodDisabledError.HTTPCode,
                response)
-       theConfig.EnableDelete = true
+       s.cluster.Collections.BlobTrash = true
 
        // Authenticated admin request for existing block.
-       response = IssueRequest(superuserExistingBlockReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserExistingBlockReq)
+       ExpectStatusCode(c,
                "data manager request, existing block",
                http.StatusOK,
                response)
@@ -588,7 +600,7 @@ func TestDeleteHandler(t *testing.T) {
        expectedDc = deletecounter{1, 0}
        json.NewDecoder(response.Body).Decode(&responseDc)
        if responseDc != expectedDc {
-               t.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
+               c.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
                        expectedDc, responseDc)
        }
        // Confirm the block has been deleted
@@ -596,16 +608,16 @@ func TestDeleteHandler(t *testing.T) {
        _, err := vols[0].Get(context.Background(), TestHash, buf)
        var blockDeleted = os.IsNotExist(err)
        if !blockDeleted {
-               t.Error("superuserExistingBlockReq: block not deleted")
+               c.Error("superuserExistingBlockReq: block not deleted")
        }
 
-       // A DELETE request on a block newer than BlobSignatureTTL
+       // A DELETE request on a block newer than BlobSigningTTL
        // should return success but leave the block on the volume.
        vols[0].Put(context.Background(), TestHash, TestBlock)
-       theConfig.BlobSignatureTTL = arvados.Duration(time.Hour)
+       s.cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour)
 
-       response = IssueRequest(superuserExistingBlockReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, superuserExistingBlockReq)
+       ExpectStatusCode(c,
                "data manager request, existing block",
                http.StatusOK,
                response)
@@ -613,13 +625,13 @@ func TestDeleteHandler(t *testing.T) {
        expectedDc = deletecounter{1, 0}
        json.NewDecoder(response.Body).Decode(&responseDc)
        if responseDc != expectedDc {
-               t.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
+               c.Errorf("superuserExistingBlockReq\nexpected: %+v\nreceived: %+v",
                        expectedDc, responseDc)
        }
        // Confirm the block has NOT been deleted.
        _, err = vols[0].Get(context.Background(), TestHash, buf)
        if err != nil {
-               t.Errorf("testing delete on new block: %s\n", err)
+               c.Errorf("testing delete on new block: %s\n", err)
        }
 }
 
@@ -650,29 +662,33 @@ func TestDeleteHandler(t *testing.T) {
 // pull list simultaneously.  Make sure that none of them return 400
 // Bad Request and that pullq.GetList() returns a valid list.
 //
-func TestPullHandler(t *testing.T) {
-       defer teardown()
+func (s *HandlerSuite) TestPullHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-       var userToken = "USER TOKEN"
-       theConfig.systemAuthToken = "DATA MANAGER TOKEN"
+       // Replace the router's pullq -- which the worker goroutines
+       // started by setup() are now receiving from -- with a new
+       // one, so we can see what the handler sends to it.
+       pullq := NewWorkQueue()
+       s.handler.Handler.(*router).pullq = pullq
 
-       pullq = NewWorkQueue()
+       var userToken = "USER TOKEN"
+       s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
 
        goodJSON := []byte(`[
                {
-                       "locator":"locator_with_two_servers",
+                       "locator":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+12345",
                        "servers":[
-                               "server1",
-                               "server2"
+                               "http://server1",
+                               "http://server2"
                        ]
                },
                {
-                       "locator":"locator_with_no_servers",
+                       "locator":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+12345",
                        "servers":[]
                },
                {
-                       "locator":"",
-                       "servers":["empty_locator"]
+                       "locator":"cccccccccccccccccccccccccccccccc+12345",
+                       "servers":["http://server1"]
                }
        ]`)
 
@@ -699,34 +715,39 @@ func TestPullHandler(t *testing.T) {
                },
                {
                        "Valid pull request from the data manager",
-                       RequestTester{"/pull", theConfig.systemAuthToken, "PUT", goodJSON},
+                       RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", goodJSON},
                        http.StatusOK,
                        "Received 3 pull requests\n",
                },
                {
                        "Invalid pull request from the data manager",
-                       RequestTester{"/pull", theConfig.systemAuthToken, "PUT", badJSON},
+                       RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", badJSON},
                        http.StatusBadRequest,
                        "",
                },
        }
 
        for _, tst := range testcases {
-               response := IssueRequest(&tst.req)
-               ExpectStatusCode(t, tst.name, tst.responseCode, response)
-               ExpectBody(t, tst.name, tst.responseBody, response)
+               response := IssueRequest(s.handler, &tst.req)
+               ExpectStatusCode(c, tst.name, tst.responseCode, response)
+               ExpectBody(c, tst.name, tst.responseBody, response)
        }
 
        // The Keep pull manager should have received one good list with 3
        // requests on it.
        for i := 0; i < 3; i++ {
-               item := <-pullq.NextItem
+               var item interface{}
+               select {
+               case item = <-pullq.NextItem:
+               case <-time.After(time.Second):
+                       c.Error("timed out")
+               }
                if _, ok := item.(PullRequest); !ok {
-                       t.Errorf("item %v could not be parsed as a PullRequest", item)
+                       c.Errorf("item %v could not be parsed as a PullRequest", item)
                }
        }
 
-       expectChannelEmpty(t, pullq.NextItem)
+       expectChannelEmpty(c, pullq.NextItem)
 }
 
 // TestTrashHandler
@@ -756,13 +777,16 @@ func TestPullHandler(t *testing.T) {
 // pull list simultaneously.  Make sure that none of them return 400
 // Bad Request and that replica.Dump() returns a valid list.
 //
-func TestTrashHandler(t *testing.T) {
-       defer teardown()
+func (s *HandlerSuite) TestTrashHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+       // Replace the router's trashq -- which the worker goroutines
+       // started by setup() are now receiving from -- with a new
+       // one, so we can see what the handler sends to it.
+       trashq := NewWorkQueue()
+       s.handler.Handler.(*router).trashq = trashq
 
        var userToken = "USER TOKEN"
-       theConfig.systemAuthToken = "DATA MANAGER TOKEN"
-
-       trashq = NewWorkQueue()
+       s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
 
        goodJSON := []byte(`[
                {
@@ -803,22 +827,22 @@ func TestTrashHandler(t *testing.T) {
                },
                {
                        "Valid trash list from the data manager",
-                       RequestTester{"/trash", theConfig.systemAuthToken, "PUT", goodJSON},
+                       RequestTester{"/trash", s.cluster.SystemRootToken, "PUT", goodJSON},
                        http.StatusOK,
                        "Received 3 trash requests\n",
                },
                {
                        "Invalid trash list from the data manager",
-                       RequestTester{"/trash", theConfig.systemAuthToken, "PUT", badJSON},
+                       RequestTester{"/trash", s.cluster.SystemRootToken, "PUT", badJSON},
                        http.StatusBadRequest,
                        "",
                },
        }
 
        for _, tst := range testcases {
-               response := IssueRequest(&tst.req)
-               ExpectStatusCode(t, tst.name, tst.responseCode, response)
-               ExpectBody(t, tst.name, tst.responseBody, response)
+               response := IssueRequest(s.handler, &tst.req)
+               ExpectStatusCode(c, tst.name, tst.responseCode, response)
+               ExpectBody(c, tst.name, tst.responseBody, response)
        }
 
        // The trash collector should have received one good list with 3
@@ -826,11 +850,11 @@ func TestTrashHandler(t *testing.T) {
        for i := 0; i < 3; i++ {
                item := <-trashq.NextItem
                if _, ok := item.(TrashRequest); !ok {
-                       t.Errorf("item %v could not be parsed as a TrashRequest", item)
+                       c.Errorf("item %v could not be parsed as a TrashRequest", item)
                }
        }
 
-       expectChannelEmpty(t, trashq.NextItem)
+       expectChannelEmpty(c, trashq.NextItem)
 }
 
 // ====================
@@ -839,75 +863,71 @@ func TestTrashHandler(t *testing.T) {
 
 // IssueTestRequest executes an HTTP request described by rt, to a
 // REST router.  It returns the HTTP response to the request.
-func IssueRequest(rt *RequestTester) *httptest.ResponseRecorder {
+func IssueRequest(handler http.Handler, rt *RequestTester) *httptest.ResponseRecorder {
        response := httptest.NewRecorder()
        body := bytes.NewReader(rt.requestBody)
        req, _ := http.NewRequest(rt.method, rt.uri, body)
        if rt.apiToken != "" {
                req.Header.Set("Authorization", "OAuth2 "+rt.apiToken)
        }
-       loggingRouter := MakeRESTRouter(testCluster, prometheus.NewRegistry())
-       loggingRouter.ServeHTTP(response, req)
+       handler.ServeHTTP(response, req)
        return response
 }
 
-func IssueHealthCheckRequest(rt *RequestTester) *httptest.ResponseRecorder {
+func IssueHealthCheckRequest(handler http.Handler, rt *RequestTester) *httptest.ResponseRecorder {
        response := httptest.NewRecorder()
        body := bytes.NewReader(rt.requestBody)
        req, _ := http.NewRequest(rt.method, rt.uri, body)
        if rt.apiToken != "" {
                req.Header.Set("Authorization", "Bearer "+rt.apiToken)
        }
-       loggingRouter := MakeRESTRouter(testCluster, prometheus.NewRegistry())
-       loggingRouter.ServeHTTP(response, req)
+       handler.ServeHTTP(response, req)
        return response
 }
 
 // ExpectStatusCode checks whether a response has the specified status code,
 // and reports a test failure if not.
 func ExpectStatusCode(
-       t *testing.T,
+       c *check.C,
        testname string,
        expectedStatus int,
        response *httptest.ResponseRecorder) {
        if response.Code != expectedStatus {
-               t.Errorf("%s: expected status %d, got %+v",
+               c.Errorf("%s: expected status %d, got %+v",
                        testname, expectedStatus, response)
        }
 }
 
 func ExpectBody(
-       t *testing.T,
+       c *check.C,
        testname string,
        expectedBody string,
        response *httptest.ResponseRecorder) {
        if expectedBody != "" && response.Body.String() != expectedBody {
-               t.Errorf("%s: expected response body '%s', got %+v",
+               c.Errorf("%s: expected response body '%s', got %+v",
                        testname, expectedBody, response)
        }
 }
 
 // See #7121
-func TestPutNeedsOnlyOneBuffer(t *testing.T) {
-       defer teardown()
-       KeepVM = MakeTestVolumeManager(1)
-       defer KeepVM.Close()
+func (s *HandlerSuite) TestPutNeedsOnlyOneBuffer(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
        defer func(orig *bufferPool) {
                bufs = orig
        }(bufs)
-       bufs = newBufferPool(1, BlockSize)
+       bufs = newBufferPool(ctxlog.TestLogger(c), 1, BlockSize)
 
        ok := make(chan struct{})
        go func() {
                for i := 0; i < 2; i++ {
-                       response := IssueRequest(
+                       response := IssueRequest(s.handler,
                                &RequestTester{
                                        method:      "PUT",
                                        uri:         "/" + TestHash,
                                        requestBody: TestBlock,
                                })
-                       ExpectStatusCode(t,
+                       ExpectStatusCode(c,
                                "TestPutNeedsOnlyOneBuffer", http.StatusOK, response)
                }
                ok <- struct{}{}
@@ -916,34 +936,30 @@ func TestPutNeedsOnlyOneBuffer(t *testing.T) {
        select {
        case <-ok:
        case <-time.After(time.Second):
-               t.Fatal("PUT deadlocks with MaxBuffers==1")
+               c.Fatal("PUT deadlocks with MaxKeepBlobBuffers==1")
        }
 }
 
 // Invoke the PutBlockHandler a bunch of times to test for bufferpool resource
 // leak.
-func TestPutHandlerNoBufferleak(t *testing.T) {
-       defer teardown()
-
-       // Prepare two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
+func (s *HandlerSuite) TestPutHandlerNoBufferleak(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
        ok := make(chan bool)
        go func() {
-               for i := 0; i < theConfig.MaxBuffers+1; i++ {
+               for i := 0; i < s.cluster.API.MaxKeepBlobBuffers+1; i++ {
                        // Unauthenticated request, no server key
                        // => OK (unsigned response)
                        unsignedLocator := "/" + TestHash
-                       response := IssueRequest(
+                       response := IssueRequest(s.handler,
                                &RequestTester{
                                        method:      "PUT",
                                        uri:         unsignedLocator,
                                        requestBody: TestBlock,
                                })
-                       ExpectStatusCode(t,
+                       ExpectStatusCode(c,
                                "TestPutHandlerBufferleak", http.StatusOK, response)
-                       ExpectBody(t,
+                       ExpectBody(c,
                                "TestPutHandlerBufferleak",
                                TestHashPutResp, response)
                }
@@ -952,7 +968,7 @@ func TestPutHandlerNoBufferleak(t *testing.T) {
        select {
        case <-time.After(20 * time.Second):
                // If the buffer pool leaks, the test goroutine hangs.
-               t.Fatal("test did not finish, assuming pool leaked")
+               c.Fatal("test did not finish, assuming pool leaked")
        case <-ok:
        }
 }
@@ -966,23 +982,18 @@ func (r *notifyingResponseRecorder) CloseNotify() <-chan bool {
        return r.closer
 }
 
-func TestGetHandlerClientDisconnect(t *testing.T) {
-       defer func(was bool) {
-               theConfig.RequireSignatures = was
-       }(theConfig.RequireSignatures)
-       theConfig.RequireSignatures = false
+func (s *HandlerSuite) TestGetHandlerClientDisconnect(c *check.C) {
+       s.cluster.Collections.BlobSigning = false
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
        defer func(orig *bufferPool) {
                bufs = orig
        }(bufs)
-       bufs = newBufferPool(1, BlockSize)
+       bufs = newBufferPool(ctxlog.TestLogger(c), 1, BlockSize)
        defer bufs.Put(bufs.Get(BlockSize))
 
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       if err := KeepVM.AllWritable()[0].Put(context.Background(), TestHash, TestBlock); err != nil {
-               t.Error(err)
+       if err := s.handler.volmgr.AllWritable()[0].Put(context.Background(), TestHash, TestBlock); err != nil {
+               c.Error(err)
        }
 
        resp := &notifyingResponseRecorder{
@@ -990,7 +1001,7 @@ func TestGetHandlerClientDisconnect(t *testing.T) {
                closer:           make(chan bool, 1),
        }
        if _, ok := http.ResponseWriter(resp).(http.CloseNotifier); !ok {
-               t.Fatal("notifyingResponseRecorder is broken")
+               c.Fatal("notifyingResponseRecorder is broken")
        }
        // If anyone asks, the client has disconnected.
        resp.closer <- true
@@ -998,52 +1009,48 @@ func TestGetHandlerClientDisconnect(t *testing.T) {
        ok := make(chan struct{})
        go func() {
                req, _ := http.NewRequest("GET", fmt.Sprintf("/%s+%d", TestHash, len(TestBlock)), nil)
-               MakeRESTRouter(testCluster, prometheus.NewRegistry()).ServeHTTP(resp, req)
+               s.handler.ServeHTTP(resp, req)
                ok <- struct{}{}
        }()
 
        select {
        case <-time.After(20 * time.Second):
-               t.Fatal("request took >20s, close notifier must be broken")
+               c.Fatal("request took >20s, close notifier must be broken")
        case <-ok:
        }
 
-       ExpectStatusCode(t, "client disconnect", http.StatusServiceUnavailable, resp.ResponseRecorder)
-       for i, v := range KeepVM.AllWritable() {
-               if calls := v.(*MockVolume).called["GET"]; calls != 0 {
-                       t.Errorf("volume %d got %d calls, expected 0", i, calls)
+       ExpectStatusCode(c, "client disconnect", http.StatusServiceUnavailable, resp.ResponseRecorder)
+       for i, v := range s.handler.volmgr.AllWritable() {
+               if calls := v.Volume.(*MockVolume).called["GET"]; calls != 0 {
+                       c.Errorf("volume %d got %d calls, expected 0", i, calls)
                }
        }
 }
 
 // Invoke the GetBlockHandler a bunch of times to test for bufferpool resource
 // leak.
-func TestGetHandlerNoBufferLeak(t *testing.T) {
-       defer teardown()
-
-       // Prepare two test Keep volumes. Our block is stored on the second volume.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
+func (s *HandlerSuite) TestGetHandlerNoBufferLeak(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-       vols := KeepVM.AllWritable()
+       vols := s.handler.volmgr.AllWritable()
        if err := vols[0].Put(context.Background(), TestHash, TestBlock); err != nil {
-               t.Error(err)
+               c.Error(err)
        }
 
        ok := make(chan bool)
        go func() {
-               for i := 0; i < theConfig.MaxBuffers+1; i++ {
+               for i := 0; i < s.cluster.API.MaxKeepBlobBuffers+1; i++ {
                        // Unauthenticated request, unsigned locator
                        // => OK
                        unsignedLocator := "/" + TestHash
-                       response := IssueRequest(
+                       response := IssueRequest(s.handler,
                                &RequestTester{
                                        method: "GET",
                                        uri:    unsignedLocator,
                                })
-                       ExpectStatusCode(t,
+                       ExpectStatusCode(c,
                                "Unauthenticated request, unsigned locator", http.StatusOK, response)
-                       ExpectBody(t,
+                       ExpectBody(c,
                                "Unauthenticated request, unsigned locator",
                                string(TestBlock),
                                response)
@@ -1053,45 +1060,41 @@ func TestGetHandlerNoBufferLeak(t *testing.T) {
        select {
        case <-time.After(20 * time.Second):
                // If the buffer pool leaks, the test goroutine hangs.
-               t.Fatal("test did not finish, assuming pool leaked")
+               c.Fatal("test did not finish, assuming pool leaked")
        case <-ok:
        }
 }
 
-func TestPutReplicationHeader(t *testing.T) {
-       defer teardown()
+func (s *HandlerSuite) TestPutReplicationHeader(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       resp := IssueRequest(&RequestTester{
+       resp := IssueRequest(s.handler, &RequestTester{
                method:      "PUT",
                uri:         "/" + TestHash,
                requestBody: TestBlock,
        })
        if r := resp.Header().Get("X-Keep-Replicas-Stored"); r != "1" {
-               t.Errorf("Got X-Keep-Replicas-Stored: %q, expected %q", r, "1")
+               c.Logf("%#v", resp)
+               c.Errorf("Got X-Keep-Replicas-Stored: %q, expected %q", r, "1")
        }
 }
 
-func TestUntrashHandler(t *testing.T) {
-       defer teardown()
+func (s *HandlerSuite) TestUntrashHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
        // Set up Keep volumes
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-       vols := KeepVM.AllWritable()
+       vols := s.handler.volmgr.AllWritable()
        vols[0].Put(context.Background(), TestHash, TestBlock)
 
-       theConfig.systemAuthToken = "DATA MANAGER TOKEN"
+       s.cluster.SystemRootToken = "DATA MANAGER TOKEN"
 
        // unauthenticatedReq => UnauthorizedError
        unauthenticatedReq := &RequestTester{
                method: "PUT",
                uri:    "/untrash/" + TestHash,
        }
-       response := IssueRequest(unauthenticatedReq)
-       ExpectStatusCode(t,
+       response := IssueRequest(s.handler, unauthenticatedReq)
+       ExpectStatusCode(c,
                "Unauthenticated request",
                UnauthorizedError.HTTPCode,
                response)
@@ -1103,8 +1106,8 @@ func TestUntrashHandler(t *testing.T) {
                apiToken: knownToken,
        }
 
-       response = IssueRequest(notDataManagerReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, notDataManagerReq)
+       ExpectStatusCode(c,
                "Non-datamanager token",
                UnauthorizedError.HTTPCode,
                response)
@@ -1113,10 +1116,10 @@ func TestUntrashHandler(t *testing.T) {
        datamanagerWithBadHashReq := &RequestTester{
                method:   "PUT",
                uri:      "/untrash/thisisnotalocator",
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
-       response = IssueRequest(datamanagerWithBadHashReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, datamanagerWithBadHashReq)
+       ExpectStatusCode(c,
                "Bad locator in untrash request",
                http.StatusBadRequest,
                response)
@@ -1125,10 +1128,10 @@ func TestUntrashHandler(t *testing.T) {
        datamanagerWrongMethodReq := &RequestTester{
                method:   "GET",
                uri:      "/untrash/" + TestHash,
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
-       response = IssueRequest(datamanagerWrongMethodReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, datamanagerWrongMethodReq)
+       ExpectStatusCode(c,
                "Only PUT method is supported for untrash",
                http.StatusMethodNotAllowed,
                response)
@@ -1137,60 +1140,57 @@ func TestUntrashHandler(t *testing.T) {
        datamanagerReq := &RequestTester{
                method:   "PUT",
                uri:      "/untrash/" + TestHash,
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
-       response = IssueRequest(datamanagerReq)
-       ExpectStatusCode(t,
+       response = IssueRequest(s.handler, datamanagerReq)
+       ExpectStatusCode(c,
                "",
                http.StatusOK,
                response)
        expected := "Successfully untrashed on: [MockVolume],[MockVolume]"
        if response.Body.String() != expected {
-               t.Errorf(
+               c.Errorf(
                        "Untrash response mismatched: expected %s, got:\n%s",
                        expected, response.Body.String())
        }
 }
 
-func TestUntrashHandlerWithNoWritableVolumes(t *testing.T) {
-       defer teardown()
-
-       // Set up readonly Keep volumes
-       vols := []*MockVolume{CreateMockVolume(), CreateMockVolume()}
-       vols[0].Readonly = true
-       vols[1].Readonly = true
-       KeepVM = MakeRRVolumeManager([]Volume{vols[0], vols[1]})
-       defer KeepVM.Close()
-
-       theConfig.systemAuthToken = "DATA MANAGER TOKEN"
+func (s *HandlerSuite) TestUntrashHandlerWithNoWritableVolumes(c *check.C) {
+       // Change all volumes to read-only
+       for uuid, v := range s.cluster.Volumes {
+               v.ReadOnly = true
+               s.cluster.Volumes[uuid] = v
+       }
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
        // datamanagerReq => StatusOK
        datamanagerReq := &RequestTester{
                method:   "PUT",
                uri:      "/untrash/" + TestHash,
-               apiToken: theConfig.systemAuthToken,
+               apiToken: s.cluster.SystemRootToken,
        }
-       response := IssueRequest(datamanagerReq)
-       ExpectStatusCode(t,
+       response := IssueRequest(s.handler, datamanagerReq)
+       ExpectStatusCode(c,
                "No writable volumes",
                http.StatusNotFound,
                response)
 }
 
-func TestHealthCheckPing(t *testing.T) {
-       theConfig.ManagementToken = arvadostest.ManagementToken
+func (s *HandlerSuite) TestHealthCheckPing(c *check.C) {
+       s.cluster.ManagementToken = arvadostest.ManagementToken
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
        pingReq := &RequestTester{
                method:   "GET",
                uri:      "/_health/ping",
                apiToken: arvadostest.ManagementToken,
        }
-       response := IssueHealthCheckRequest(pingReq)
-       ExpectStatusCode(t,
+       response := IssueHealthCheckRequest(s.handler, pingReq)
+       ExpectStatusCode(c,
                "",
                http.StatusOK,
                response)
        want := `{"health":"OK"}`
        if !strings.Contains(response.Body.String(), want) {
-               t.Errorf("expected response to include %s: got %s", want, response.Body.String())
+               c.Errorf("expected response to include %s: got %s", want, response.Body.String())
        }
 }
index 72088e2b5ead5726e02bc06c6d8f84e6b5817fa5..86504422d52f24f2659166e7cbfa975cb45772da 100644 (file)
@@ -11,6 +11,7 @@ import (
        "encoding/json"
        "fmt"
        "io"
+       "log"
        "net/http"
        "os"
        "regexp"
@@ -26,23 +27,31 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/gorilla/mux"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
 type router struct {
        *mux.Router
-       limiter     httpserver.RequestCounter
        cluster     *arvados.Cluster
+       logger      logrus.FieldLogger
        remoteProxy remoteProxy
        metrics     *nodeMetrics
+       volmgr      *RRVolumeManager
+       pullq       *WorkQueue
+       trashq      *WorkQueue
 }
 
 // MakeRESTRouter returns a new router that forwards all Keep requests
 // to the appropriate handlers.
-func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Handler {
+func MakeRESTRouter(ctx context.Context, cluster *arvados.Cluster, reg *prometheus.Registry, volmgr *RRVolumeManager, pullq, trashq *WorkQueue) http.Handler {
        rtr := &router{
                Router:  mux.NewRouter(),
                cluster: cluster,
+               logger:  ctxlog.FromContext(ctx),
                metrics: &nodeMetrics{reg: reg},
+               volmgr:  volmgr,
+               pullq:   pullq,
+               trashq:  trashq,
        }
 
        rtr.HandleFunc(
@@ -52,12 +61,12 @@ func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Han
                rtr.handleGET).Methods("GET", "HEAD")
 
        rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, rtr.handlePUT).Methods("PUT")
-       rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, DeleteHandler).Methods("DELETE")
+       rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, rtr.handleDELETE).Methods("DELETE")
        // List all blocks stored here. Privileged client only.
-       rtr.HandleFunc(`/index`, rtr.IndexHandler).Methods("GET", "HEAD")
+       rtr.HandleFunc(`/index`, rtr.handleIndex).Methods("GET", "HEAD")
        // List blocks stored here whose hash has the given prefix.
        // Privileged client only.
-       rtr.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, rtr.IndexHandler).Methods("GET", "HEAD")
+       rtr.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, rtr.handleIndex).Methods("GET", "HEAD")
 
        // Internals/debugging info (runtime.MemStats)
        rtr.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD")
@@ -67,20 +76,20 @@ func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Han
 
        // List mounts: UUID, readonly, tier, device ID, ...
        rtr.HandleFunc(`/mounts`, rtr.MountsHandler).Methods("GET")
-       rtr.HandleFunc(`/mounts/{uuid}/blocks`, rtr.IndexHandler).Methods("GET")
-       rtr.HandleFunc(`/mounts/{uuid}/blocks/`, rtr.IndexHandler).Methods("GET")
+       rtr.HandleFunc(`/mounts/{uuid}/blocks`, rtr.handleIndex).Methods("GET")
+       rtr.HandleFunc(`/mounts/{uuid}/blocks/`, rtr.handleIndex).Methods("GET")
 
        // Replace the current pull queue.
-       rtr.HandleFunc(`/pull`, PullHandler).Methods("PUT")
+       rtr.HandleFunc(`/pull`, rtr.handlePull).Methods("PUT")
 
        // Replace the current trash queue.
-       rtr.HandleFunc(`/trash`, TrashHandler).Methods("PUT")
+       rtr.HandleFunc(`/trash`, rtr.handleTrash).Methods("PUT")
 
        // Untrash moves blocks from trash back into store
-       rtr.HandleFunc(`/untrash/{hash:[0-9a-f]{32}}`, UntrashHandler).Methods("PUT")
+       rtr.HandleFunc(`/untrash/{hash:[0-9a-f]{32}}`, rtr.handleUntrash).Methods("PUT")
 
        rtr.Handle("/_health/{check}", &health.Handler{
-               Token:  theConfig.ManagementToken,
+               Token:  cluster.ManagementToken,
                Prefix: "/_health/",
        }).Methods("GET")
 
@@ -88,17 +97,11 @@ func MakeRESTRouter(cluster *arvados.Cluster, reg *prometheus.Registry) http.Han
        // 400 Bad Request.
        rtr.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
 
-       rtr.limiter = httpserver.NewRequestLimiter(theConfig.MaxRequests, rtr)
        rtr.metrics.setupBufferPoolMetrics(bufs)
-       rtr.metrics.setupWorkQueueMetrics(pullq, "pull")
-       rtr.metrics.setupWorkQueueMetrics(trashq, "trash")
-       rtr.metrics.setupRequestMetrics(rtr.limiter)
-
-       instrumented := httpserver.Instrument(rtr.metrics.reg, log,
-               httpserver.HandlerWithContext(
-                       ctxlog.Context(context.Background(), log),
-                       httpserver.AddRequestIDs(httpserver.LogRequests(rtr.limiter))))
-       return instrumented.ServeAPI(theConfig.ManagementToken, instrumented)
+       rtr.metrics.setupWorkQueueMetrics(rtr.pullq, "pull")
+       rtr.metrics.setupWorkQueueMetrics(rtr.trashq, "trash")
+
+       return rtr
 }
 
 // BadRequestHandler is a HandleFunc to address bad requests.
@@ -112,13 +115,13 @@ func (rtr *router) handleGET(resp http.ResponseWriter, req *http.Request) {
 
        locator := req.URL.Path[1:]
        if strings.Contains(locator, "+R") && !strings.Contains(locator, "+A") {
-               rtr.remoteProxy.Get(ctx, resp, req, rtr.cluster)
+               rtr.remoteProxy.Get(ctx, resp, req, rtr.cluster, rtr.volmgr)
                return
        }
 
-       if theConfig.RequireSignatures {
+       if rtr.cluster.Collections.BlobSigning {
                locator := req.URL.Path[1:] // strip leading slash
-               if err := VerifySignature(locator, GetAPIToken(req)); err != nil {
+               if err := VerifySignature(rtr.cluster, locator, GetAPIToken(req)); err != nil {
                        http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
                        return
                }
@@ -138,7 +141,7 @@ func (rtr *router) handleGET(resp http.ResponseWriter, req *http.Request) {
        }
        defer bufs.Put(buf)
 
-       size, err := GetBlock(ctx, mux.Vars(req)["hash"], buf, resp)
+       size, err := GetBlock(ctx, rtr.volmgr, mux.Vars(req)["hash"], buf, resp)
        if err != nil {
                code := http.StatusInternalServerError
                if err, ok := err.(*KeepError); ok {
@@ -160,7 +163,6 @@ func contextForResponse(parent context.Context, resp http.ResponseWriter) (conte
                go func(c <-chan bool) {
                        select {
                        case <-c:
-                               theConfig.debugLogf("cancel context")
                                cancel()
                        case <-ctx.Done():
                        }
@@ -210,7 +212,7 @@ func (rtr *router) handlePUT(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       if len(KeepVM.AllWritable()) == 0 {
+       if len(rtr.volmgr.AllWritable()) == 0 {
                http.Error(resp, FullError.Error(), FullError.HTTPCode)
                return
        }
@@ -228,7 +230,7 @@ func (rtr *router) handlePUT(resp http.ResponseWriter, req *http.Request) {
                return
        }
 
-       replication, err := PutBlock(ctx, buf, hash)
+       replication, err := PutBlock(ctx, rtr.volmgr, buf, hash)
        bufs.Put(buf)
 
        if err != nil {
@@ -244,9 +246,9 @@ func (rtr *router) handlePUT(resp http.ResponseWriter, req *http.Request) {
        // return it to the client.
        returnHash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
        apiToken := GetAPIToken(req)
-       if theConfig.blobSigningKey != nil && apiToken != "" {
-               expiry := time.Now().Add(theConfig.BlobSignatureTTL.Duration())
-               returnHash = SignLocator(returnHash, apiToken, expiry)
+       if rtr.cluster.Collections.BlobSigningKey != "" && apiToken != "" {
+               expiry := time.Now().Add(rtr.cluster.Collections.BlobSigningTTL.Duration())
+               returnHash = SignLocator(rtr.cluster, returnHash, apiToken, expiry)
        }
        resp.Header().Set("X-Keep-Replicas-Stored", strconv.Itoa(replication))
        resp.Write([]byte(returnHash + "\n"))
@@ -254,8 +256,8 @@ func (rtr *router) handlePUT(resp http.ResponseWriter, req *http.Request) {
 
 // IndexHandler responds to "/index", "/index/{prefix}", and
 // "/mounts/{uuid}/blocks" requests.
-func (rtr *router) IndexHandler(resp http.ResponseWriter, req *http.Request) {
-       if !IsSystemAuth(GetAPIToken(req)) {
+func (rtr *router) handleIndex(resp http.ResponseWriter, req *http.Request) {
+       if !rtr.isSystemAuth(GetAPIToken(req)) {
                http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
                return
        }
@@ -268,14 +270,14 @@ func (rtr *router) IndexHandler(resp http.ResponseWriter, req *http.Request) {
 
        uuid := mux.Vars(req)["uuid"]
 
-       var vols []Volume
+       var vols []*VolumeMount
        if uuid == "" {
-               vols = KeepVM.AllReadable()
-       } else if v := KeepVM.Lookup(uuid, false); v == nil {
+               vols = rtr.volmgr.AllReadable()
+       } else if mnt := rtr.volmgr.Lookup(uuid, false); mnt == nil {
                http.Error(resp, "mount not found", http.StatusNotFound)
                return
        } else {
-               vols = []Volume{v}
+               vols = []*VolumeMount{mnt}
        }
 
        for _, v := range vols {
@@ -303,9 +305,9 @@ func (rtr *router) IndexHandler(resp http.ResponseWriter, req *http.Request) {
 
 // MountsHandler responds to "GET /mounts" requests.
 func (rtr *router) MountsHandler(resp http.ResponseWriter, req *http.Request) {
-       err := json.NewEncoder(resp).Encode(KeepVM.Mounts())
+       err := json.NewEncoder(resp).Encode(rtr.volmgr.Mounts())
        if err != nil {
-               http.Error(resp, err.Error(), http.StatusInternalServerError)
+               httpserver.Error(resp, err.Error(), http.StatusInternalServerError)
        }
 }
 
@@ -368,32 +370,28 @@ func (rtr *router) StatusHandler(resp http.ResponseWriter, req *http.Request) {
 // populate the given NodeStatus struct with current values.
 func (rtr *router) readNodeStatus(st *NodeStatus) {
        st.Version = version
-       vols := KeepVM.AllReadable()
+       vols := rtr.volmgr.AllReadable()
        if cap(st.Volumes) < len(vols) {
                st.Volumes = make([]*volumeStatusEnt, len(vols))
        }
        st.Volumes = st.Volumes[:0]
        for _, vol := range vols {
                var internalStats interface{}
-               if vol, ok := vol.(InternalStatser); ok {
+               if vol, ok := vol.Volume.(InternalStatser); ok {
                        internalStats = vol.InternalStats()
                }
                st.Volumes = append(st.Volumes, &volumeStatusEnt{
                        Label:         vol.String(),
                        Status:        vol.Status(),
                        InternalStats: internalStats,
-                       //VolumeStats: KeepVM.VolumeStats(vol),
+                       //VolumeStats: rtr.volmgr.VolumeStats(vol),
                })
        }
        st.BufferPool.Alloc = bufs.Alloc()
        st.BufferPool.Cap = bufs.Cap()
        st.BufferPool.Len = bufs.Len()
-       st.PullQueue = getWorkQueueStatus(pullq)
-       st.TrashQueue = getWorkQueueStatus(trashq)
-       if rtr.limiter != nil {
-               st.RequestsCurrent = rtr.limiter.Current()
-               st.RequestsMax = rtr.limiter.Max()
-       }
+       st.PullQueue = getWorkQueueStatus(rtr.pullq)
+       st.TrashQueue = getWorkQueueStatus(rtr.trashq)
 }
 
 // return a WorkQueueStatus for the given queue. If q is nil (which
@@ -407,7 +405,7 @@ func getWorkQueueStatus(q *WorkQueue) WorkQueueStatus {
        return q.Status()
 }
 
-// DeleteHandler processes DELETE requests.
+// handleDELETE processes DELETE requests.
 //
 // DELETE /{hash:[0-9a-f]{32} will delete the block with the specified hash
 // from all connected volumes.
@@ -418,7 +416,7 @@ func getWorkQueueStatus(q *WorkQueue) WorkQueueStatus {
 // a PermissionError.
 //
 // Upon receiving a valid request from an authorized user,
-// DeleteHandler deletes all copies of the specified block on local
+// handleDELETE deletes all copies of the specified block on local
 // writable volumes.
 //
 // Response format:
@@ -434,17 +432,17 @@ func getWorkQueueStatus(q *WorkQueue) WorkQueueStatus {
 // where d and f are integers representing the number of blocks that
 // were successfully and unsuccessfully deleted.
 //
-func DeleteHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) handleDELETE(resp http.ResponseWriter, req *http.Request) {
        hash := mux.Vars(req)["hash"]
 
        // Confirm that this user is an admin and has a token with unlimited scope.
        var tok = GetAPIToken(req)
-       if tok == "" || !CanDelete(tok) {
+       if tok == "" || !rtr.canDelete(tok) {
                http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
                return
        }
 
-       if !theConfig.EnableDelete {
+       if !rtr.cluster.Collections.BlobTrash {
                http.Error(resp, MethodDisabledError.Error(), MethodDisabledError.HTTPCode)
                return
        }
@@ -456,7 +454,7 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) {
                Deleted int `json:"copies_deleted"`
                Failed  int `json:"copies_failed"`
        }
-       for _, vol := range KeepVM.AllWritable() {
+       for _, vol := range rtr.volmgr.AllWritable() {
                if err := vol.Trash(hash); err == nil {
                        result.Deleted++
                } else if os.IsNotExist(err) {
@@ -530,9 +528,9 @@ type PullRequest struct {
 }
 
 // PullHandler processes "PUT /pull" requests for the data manager.
-func PullHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) handlePull(resp http.ResponseWriter, req *http.Request) {
        // Reject unauthorized requests.
-       if !IsSystemAuth(GetAPIToken(req)) {
+       if !rtr.isSystemAuth(GetAPIToken(req)) {
                http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
                return
        }
@@ -556,7 +554,7 @@ func PullHandler(resp http.ResponseWriter, req *http.Request) {
        for _, p := range pr {
                plist.PushBack(p)
        }
-       pullq.ReplaceQueue(plist)
+       rtr.pullq.ReplaceQueue(plist)
 }
 
 // TrashRequest consists of a block locator and its Mtime
@@ -569,9 +567,9 @@ type TrashRequest struct {
 }
 
 // TrashHandler processes /trash requests.
-func TrashHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) handleTrash(resp http.ResponseWriter, req *http.Request) {
        // Reject unauthorized requests.
-       if !IsSystemAuth(GetAPIToken(req)) {
+       if !rtr.isSystemAuth(GetAPIToken(req)) {
                http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
                return
        }
@@ -595,27 +593,27 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) {
        for _, t := range trash {
                tlist.PushBack(t)
        }
-       trashq.ReplaceQueue(tlist)
+       rtr.trashq.ReplaceQueue(tlist)
 }
 
 // UntrashHandler processes "PUT /untrash/{hash:[0-9a-f]{32}}" requests for the data manager.
-func UntrashHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) handleUntrash(resp http.ResponseWriter, req *http.Request) {
        // Reject unauthorized requests.
-       if !IsSystemAuth(GetAPIToken(req)) {
+       if !rtr.isSystemAuth(GetAPIToken(req)) {
                http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
                return
        }
 
        hash := mux.Vars(req)["hash"]
 
-       if len(KeepVM.AllWritable()) == 0 {
+       if len(rtr.volmgr.AllWritable()) == 0 {
                http.Error(resp, "No writable volumes", http.StatusNotFound)
                return
        }
 
        var untrashedOn, failedOn []string
        var numNotFound int
-       for _, vol := range KeepVM.AllWritable() {
+       for _, vol := range rtr.volmgr.AllWritable() {
                err := vol.Untrash(hash)
 
                if os.IsNotExist(err) {
@@ -629,12 +627,12 @@ func UntrashHandler(resp http.ResponseWriter, req *http.Request) {
                }
        }
 
-       if numNotFound == len(KeepVM.AllWritable()) {
+       if numNotFound == len(rtr.volmgr.AllWritable()) {
                http.Error(resp, "Block not found on any of the writable volumes", http.StatusNotFound)
                return
        }
 
-       if len(failedOn) == len(KeepVM.AllWritable()) {
+       if len(failedOn) == len(rtr.volmgr.AllWritable()) {
                http.Error(resp, "Failed to untrash on all writable volumes", http.StatusInternalServerError)
        } else {
                respBody := "Successfully untrashed on: " + strings.Join(untrashedOn, ",")
@@ -664,11 +662,11 @@ func UntrashHandler(resp http.ResponseWriter, req *http.Request) {
 // If the block found does not have the correct MD5 hash, returns
 // DiskHashError.
 //
-func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWriter) (int, error) {
+func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []byte, resp http.ResponseWriter) (int, error) {
        // Attempt to read the requested hash from a keep volume.
        errorToCaller := NotFoundError
 
-       for _, vol := range KeepVM.AllReadable() {
+       for _, vol := range volmgr.AllReadable() {
                size, err := vol.Get(ctx, hash, buf)
                select {
                case <-ctx.Done():
@@ -738,7 +736,7 @@ func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWr
 //          all writes failed). The text of the error message should
 //          provide as much detail as possible.
 //
-func PutBlock(ctx context.Context, block []byte, hash string) (int, error) {
+func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string) (int, error) {
        // Check that BLOCK's checksum matches HASH.
        blockhash := fmt.Sprintf("%x", md5.Sum(block))
        if blockhash != hash {
@@ -749,7 +747,7 @@ func PutBlock(ctx context.Context, block []byte, hash string) (int, error) {
        // If we already have this data, it's intact on disk, and we
        // can update its timestamp, return success. If we have
        // different data with the same hash, return failure.
-       if n, err := CompareAndTouch(ctx, hash, block); err == nil || err == CollisionError {
+       if n, err := CompareAndTouch(ctx, volmgr, hash, block); err == nil || err == CollisionError {
                return n, err
        } else if ctx.Err() != nil {
                return 0, ErrClientDisconnect
@@ -757,16 +755,16 @@ func PutBlock(ctx context.Context, block []byte, hash string) (int, error) {
 
        // Choose a Keep volume to write to.
        // If this volume fails, try all of the volumes in order.
-       if vol := KeepVM.NextWritable(); vol != nil {
-               if err := vol.Put(ctx, hash, block); err == nil {
-                       return vol.Replication(), nil // success!
+       if mnt := volmgr.NextWritable(); mnt != nil {
+               if err := mnt.Put(ctx, hash, block); err == nil {
+                       return mnt.Replication, nil // success!
                }
                if ctx.Err() != nil {
                        return 0, ErrClientDisconnect
                }
        }
 
-       writables := KeepVM.AllWritable()
+       writables := volmgr.AllWritable()
        if len(writables) == 0 {
                log.Print("No writable volumes.")
                return 0, FullError
@@ -779,7 +777,7 @@ func PutBlock(ctx context.Context, block []byte, hash string) (int, error) {
                        return 0, ErrClientDisconnect
                }
                if err == nil {
-                       return vol.Replication(), nil // success!
+                       return vol.Replication, nil // success!
                }
                if err != FullError {
                        // The volume is not full but the
@@ -803,10 +801,10 @@ func PutBlock(ctx context.Context, block []byte, hash string) (int, error) {
 // the relevant block's modification time in order to protect it from
 // premature garbage collection. Otherwise, it returns a non-nil
 // error.
-func CompareAndTouch(ctx context.Context, hash string, buf []byte) (int, error) {
+func CompareAndTouch(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []byte) (int, error) {
        var bestErr error = NotFoundError
-       for _, vol := range KeepVM.AllWritable() {
-               err := vol.Compare(ctx, hash, buf)
+       for _, mnt := range volmgr.AllWritable() {
+               err := mnt.Compare(ctx, hash, buf)
                if ctx.Err() != nil {
                        return 0, ctx.Err()
                } else if err == CollisionError {
@@ -815,7 +813,7 @@ func CompareAndTouch(ctx context.Context, hash string, buf []byte) (int, error)
                        // to tell which one is wanted if we have
                        // both, so there's no point writing it even
                        // on a different volume.)
-                       log.Printf("%s: Compare(%s): %s", vol, hash, err)
+                       log.Printf("%s: Compare(%s): %s", mnt.Volume, hash, err)
                        return 0, err
                } else if os.IsNotExist(err) {
                        // Block does not exist. This is the only
@@ -825,16 +823,16 @@ func CompareAndTouch(ctx context.Context, hash string, buf []byte) (int, error)
                        // Couldn't open file, data is corrupt on
                        // disk, etc.: log this abnormal condition,
                        // and try the next volume.
-                       log.Printf("%s: Compare(%s): %s", vol, hash, err)
+                       log.Printf("%s: Compare(%s): %s", mnt.Volume, hash, err)
                        continue
                }
-               if err := vol.Touch(hash); err != nil {
-                       log.Printf("%s: Touch %s failed: %s", vol, hash, err)
+               if err := mnt.Touch(hash); err != nil {
+                       log.Printf("%s: Touch %s failed: %s", mnt.Volume, hash, err)
                        bestErr = err
                        continue
                }
                // Compare and Touch both worked --> done.
-               return vol.Replication(), nil
+               return mnt.Replication, nil
        }
        return 0, bestErr
 }
@@ -875,15 +873,15 @@ func IsExpired(timestampHex string) bool {
        return time.Unix(ts, 0).Before(time.Now())
 }
 
-// CanDelete returns true if the user identified by apiToken is
+// canDelete returns true if the user identified by apiToken is
 // allowed to delete blocks.
-func CanDelete(apiToken string) bool {
+func (rtr *router) canDelete(apiToken string) bool {
        if apiToken == "" {
                return false
        }
        // Blocks may be deleted only when Keep has been configured with a
        // data manager.
-       if IsSystemAuth(apiToken) {
+       if rtr.isSystemAuth(apiToken) {
                return true
        }
        // TODO(twp): look up apiToken with the API server
@@ -892,8 +890,8 @@ func CanDelete(apiToken string) bool {
        return false
 }
 
-// IsSystemAuth returns true if the given token is allowed to perform
+// isSystemAuth returns true if the given token is allowed to perform
 // system level actions like deleting data.
-func IsSystemAuth(token string) bool {
-       return token != "" && token == theConfig.systemAuthToken
+func (rtr *router) isSystemAuth(token string) bool {
+       return token != "" && token == rtr.cluster.SystemRootToken
 }
diff --git a/services/keepstore/handlers_with_generic_volume_test.go b/services/keepstore/handlers_with_generic_volume_test.go
deleted file mode 100644 (file)
index 4ffb7f8..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "bytes"
-       "context"
-)
-
-// A TestableVolumeManagerFactory creates a volume manager with at least two TestableVolume instances.
-// The factory function, and the TestableVolume instances it returns, can use "t" to write
-// logs, fail the current test, etc.
-type TestableVolumeManagerFactory func(t TB) (*RRVolumeManager, []TestableVolume)
-
-// DoHandlersWithGenericVolumeTests runs a set of handler tests with a
-// Volume Manager comprised of TestableVolume instances.
-// It calls factory to create a volume manager with TestableVolume
-// instances for each test case, to avoid leaking state between tests.
-func DoHandlersWithGenericVolumeTests(t TB, factory TestableVolumeManagerFactory) {
-       testGetBlock(t, factory, TestHash, TestBlock)
-       testGetBlock(t, factory, EmptyHash, EmptyBlock)
-       testPutRawBadDataGetBlock(t, factory, TestHash, TestBlock, []byte("baddata"))
-       testPutRawBadDataGetBlock(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
-       testPutBlock(t, factory, TestHash, TestBlock)
-       testPutBlock(t, factory, EmptyHash, EmptyBlock)
-       testPutBlockCorrupt(t, factory, TestHash, TestBlock, []byte("baddata"))
-       testPutBlockCorrupt(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
-}
-
-// Setup RRVolumeManager with TestableVolumes
-func setupHandlersWithGenericVolumeTest(t TB, factory TestableVolumeManagerFactory) []TestableVolume {
-       vm, testableVolumes := factory(t)
-       KeepVM = vm
-
-       for _, v := range testableVolumes {
-               defer v.Teardown()
-       }
-       defer KeepVM.Close()
-
-       return testableVolumes
-}
-
-// Put a block using PutRaw in just one volume and Get it using GetBlock
-func testGetBlock(t TB, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
-       testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
-
-       // Put testBlock in one volume
-       testableVolumes[1].PutRaw(testHash, testBlock)
-
-       // Get should pass
-       buf := make([]byte, len(testBlock))
-       n, err := GetBlock(context.Background(), testHash, buf, nil)
-       if err != nil {
-               t.Fatalf("Error while getting block %s", err)
-       }
-       if bytes.Compare(buf[:n], testBlock) != 0 {
-               t.Errorf("Put succeeded but Get returned %+v, expected %+v", buf[:n], testBlock)
-       }
-}
-
-// Put a bad block using PutRaw and get it.
-func testPutRawBadDataGetBlock(t TB, factory TestableVolumeManagerFactory,
-       testHash string, testBlock []byte, badData []byte) {
-       testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
-
-       // Put bad data for testHash in both volumes
-       testableVolumes[0].PutRaw(testHash, badData)
-       testableVolumes[1].PutRaw(testHash, badData)
-
-       // Get should fail
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), testHash, buf, nil)
-       if err == nil {
-               t.Fatalf("Got %+q, expected error while getting corrupt block %v", buf[:size], testHash)
-       }
-}
-
-// Invoke PutBlock twice to ensure CompareAndTouch path is tested.
-func testPutBlock(t TB, factory TestableVolumeManagerFactory, testHash string, testBlock []byte) {
-       setupHandlersWithGenericVolumeTest(t, factory)
-
-       // PutBlock
-       if _, err := PutBlock(context.Background(), testBlock, testHash); err != nil {
-               t.Fatalf("Error during PutBlock: %s", err)
-       }
-
-       // Check that PutBlock succeeds again even after CompareAndTouch
-       if _, err := PutBlock(context.Background(), testBlock, testHash); err != nil {
-               t.Fatalf("Error during PutBlock: %s", err)
-       }
-
-       // Check that PutBlock stored the data as expected
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), testHash, buf, nil)
-       if err != nil {
-               t.Fatalf("Error during GetBlock for %q: %s", testHash, err)
-       } else if bytes.Compare(buf[:size], testBlock) != 0 {
-               t.Errorf("Get response incorrect. Expected %q; found %q", testBlock, buf[:size])
-       }
-}
-
-// Put a bad block using PutRaw, overwrite it using PutBlock and get it.
-func testPutBlockCorrupt(t TB, factory TestableVolumeManagerFactory,
-       testHash string, testBlock []byte, badData []byte) {
-       testableVolumes := setupHandlersWithGenericVolumeTest(t, factory)
-
-       // Put bad data for testHash in both volumes
-       testableVolumes[0].PutRaw(testHash, badData)
-       testableVolumes[1].PutRaw(testHash, badData)
-
-       // Check that PutBlock with good data succeeds
-       if _, err := PutBlock(context.Background(), testBlock, testHash); err != nil {
-               t.Fatalf("Error during PutBlock for %q: %s", testHash, err)
-       }
-
-       // Put succeeded and overwrote the badData in one volume,
-       // and Get should return the testBlock now, ignoring the bad data.
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), testHash, buf, nil)
-       if err != nil {
-               t.Fatalf("Error during GetBlock for %q: %s", testHash, err)
-       } else if bytes.Compare(buf[:size], testBlock) != 0 {
-               t.Errorf("Get response incorrect. Expected %q; found %q", testBlock, buf[:size])
-       }
-}
index fcbdddacb1d585e995c8f23a0528be2ce8c1723c..f2973b586aa1a4ad83fe52d5f8d5b70410718c76 100644 (file)
@@ -5,24 +5,9 @@
 package main
 
 import (
-       "flag"
-       "fmt"
-       "net"
-       "os"
-       "os/signal"
-       "syscall"
        "time"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/config"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "github.com/coreos/go-systemd/daemon"
-       "github.com/prometheus/client_golang/prometheus"
 )
 
-var version = "dev"
-
 // A Keep "block" is 64MB.
 const BlockSize = 64 * 1024 * 1024
 
@@ -30,9 +15,6 @@ const BlockSize = 64 * 1024 * 1024
 // in order to permit writes.
 const MinFreeKilobytes = BlockSize / 1024
 
-// ProcMounts /proc/mounts
-var ProcMounts = "/proc/mounts"
-
 var bufs *bufferPool
 
 // KeepError types.
@@ -65,184 +47,11 @@ func (e *KeepError) Error() string {
        return e.ErrMsg
 }
 
-// ========================
-// Internal data structures
-//
-// These global variables are used by multiple parts of the
-// program. They are good candidates for moving into their own
-// packages.
-
-// The Keep VolumeManager maintains a list of available volumes.
-// Initialized by the --volumes flag (or by FindKeepVolumes).
-var KeepVM VolumeManager
-
-// The pull list manager and trash queue are threadsafe queues which
-// support atomic update operations. The PullHandler and TrashHandler
-// store results from Data Manager /pull and /trash requests here.
-//
-// See the Keep and Data Manager design documents for more details:
-// https://arvados.org/projects/arvados/wiki/Keep_Design_Doc
-// https://arvados.org/projects/arvados/wiki/Data_Manager_Design_Doc
-//
-var pullq *WorkQueue
-var trashq *WorkQueue
-
-func main() {
-       deprecated.beforeFlagParse(theConfig)
-
-       dumpConfig := flag.Bool("dump-config", false, "write current configuration to stdout and exit (useful for migrating from command line flags to config file)")
-       getVersion := flag.Bool("version", false, "Print version information and exit.")
-
-       defaultConfigPath := "/etc/arvados/keepstore/keepstore.yml"
-       var configPath string
-       flag.StringVar(
-               &configPath,
-               "config",
-               defaultConfigPath,
-               "YAML or JSON configuration file `path`")
-       flag.Usage = usage
-       flag.Parse()
-
-       // Print version information if requested
-       if *getVersion {
-               fmt.Printf("keepstore %s\n", version)
-               return
-       }
-
-       deprecated.afterFlagParse(theConfig)
-
-       err := config.LoadFile(theConfig, configPath)
-       if err != nil && (!os.IsNotExist(err) || configPath != defaultConfigPath) {
-               log.Fatal(err)
-       }
-
-       if *dumpConfig {
-               log.Fatal(config.DumpAndExit(theConfig))
-       }
-
-       log.Printf("keepstore %s started", version)
-
-       metricsRegistry := prometheus.NewRegistry()
-
-       err = theConfig.Start(metricsRegistry)
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       if pidfile := theConfig.PIDFile; pidfile != "" {
-               f, err := os.OpenFile(pidfile, os.O_RDWR|os.O_CREATE, 0777)
-               if err != nil {
-                       log.Fatalf("open pidfile (%s): %s", pidfile, err)
-               }
-               defer f.Close()
-               err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
-               if err != nil {
-                       log.Fatalf("flock pidfile (%s): %s", pidfile, err)
-               }
-               defer os.Remove(pidfile)
-               err = f.Truncate(0)
-               if err != nil {
-                       log.Fatalf("truncate pidfile (%s): %s", pidfile, err)
-               }
-               _, err = fmt.Fprint(f, os.Getpid())
-               if err != nil {
-                       log.Fatalf("write pidfile (%s): %s", pidfile, err)
-               }
-               err = f.Sync()
-               if err != nil {
-                       log.Fatalf("sync pidfile (%s): %s", pidfile, err)
-               }
-       }
-
-       var cluster *arvados.Cluster
-       cfg, err := arvados.GetConfig(arvados.DefaultConfigFile)
-       if err != nil && os.IsNotExist(err) {
-               log.Warnf("DEPRECATED: proceeding without cluster configuration file %q (%s)", arvados.DefaultConfigFile, err)
-               cluster = &arvados.Cluster{
-                       ClusterID: "xxxxx",
-               }
-       } else if err != nil {
-               log.Fatalf("load config %q: %s", arvados.DefaultConfigFile, err)
-       } else {
-               cluster, err = cfg.GetCluster("")
-               if err != nil {
-                       log.Fatalf("config error in %q: %s", arvados.DefaultConfigFile, err)
-               }
-       }
-
-       log.Println("keepstore starting, pid", os.Getpid())
-       defer log.Println("keepstore exiting, pid", os.Getpid())
-
-       // Start a round-robin VolumeManager with the volumes we have found.
-       KeepVM = MakeRRVolumeManager(theConfig.Volumes)
-
-       // Middleware/handler stack
-       router := MakeRESTRouter(cluster, metricsRegistry)
-
-       // Set up a TCP listener.
-       listener, err := net.Listen("tcp", theConfig.Listen)
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       // Initialize keepclient for pull workers
-       keepClient := &keepclient.KeepClient{
-               Arvados:       &arvadosclient.ArvadosClient{},
-               Want_replicas: 1,
-       }
-
-       // Initialize the pullq and workers
-       pullq = NewWorkQueue()
-       for i := 0; i < 1 || i < theConfig.PullWorkers; i++ {
-               go RunPullWorker(pullq, keepClient)
-       }
-
-       // Initialize the trashq and workers
-       trashq = NewWorkQueue()
-       for i := 0; i < 1 || i < theConfig.TrashWorkers; i++ {
-               go RunTrashWorker(trashq)
-       }
-
-       // Start emptyTrash goroutine
-       doneEmptyingTrash := make(chan bool)
-       go emptyTrash(doneEmptyingTrash, theConfig.TrashCheckInterval.Duration())
-
-       // Shut down the server gracefully (by closing the listener)
-       // if SIGTERM is received.
-       term := make(chan os.Signal, 1)
-       go func(sig <-chan os.Signal) {
-               s := <-sig
-               log.Println("caught signal:", s)
-               doneEmptyingTrash <- true
-               listener.Close()
-       }(term)
-       signal.Notify(term, syscall.SIGTERM)
-       signal.Notify(term, syscall.SIGINT)
-
-       if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
-               log.Printf("Error notifying init daemon: %v", err)
-       }
-       log.Println("listening at", listener.Addr())
-       srv := &server{}
-       srv.Handler = router
-       srv.Serve(listener)
-}
-
 // Periodically (once per interval) invoke EmptyTrash on all volumes.
-func emptyTrash(done <-chan bool, interval time.Duration) {
-       ticker := time.NewTicker(interval)
-
-       for {
-               select {
-               case <-ticker.C:
-                       for _, v := range theConfig.Volumes {
-                               if v.Writable() {
-                                       v.EmptyTrash()
-                               }
-                       }
-               case <-done:
-                       ticker.Stop()
-                       return
+func emptyTrash(mounts []*VolumeMount, interval time.Duration) {
+       for range time.NewTicker(interval).C {
+               for _, v := range mounts {
+                       v.EmptyTrash()
                }
        }
 }
index 8b448e72c3b41687f46dc2c6a6ad8a0c21202258..728c6fded1ac6f372f30b7a4da78da7c6a2f7199 100644 (file)
@@ -6,7 +6,6 @@
 Description=Arvados Keep Storage Daemon
 Documentation=https://doc.arvados.org/
 After=network.target
-AssertPathExists=/etc/arvados/keepstore/keepstore.yml
 
 # systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
 StartLimitInterval=0
diff --git a/services/keepstore/keepstore_test.go b/services/keepstore/keepstore_test.go
deleted file mode 100644 (file)
index d1d3804..0000000
+++ /dev/null
@@ -1,456 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "bytes"
-       "context"
-       "errors"
-       "fmt"
-       "io/ioutil"
-       "os"
-       "path"
-       "regexp"
-       "sort"
-       "strings"
-       "testing"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-)
-
-var TestBlock = []byte("The quick brown fox jumps over the lazy dog.")
-var TestHash = "e4d909c290d0fb1ca068ffaddf22cbd0"
-var TestHashPutResp = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
-
-var TestBlock2 = []byte("Pack my box with five dozen liquor jugs.")
-var TestHash2 = "f15ac516f788aec4f30932ffb6395c39"
-
-var TestBlock3 = []byte("Now is the time for all good men to come to the aid of their country.")
-var TestHash3 = "eed29bbffbc2dbe5e5ee0bb71888e61f"
-
-// BadBlock is used to test collisions and corruption.
-// It must not match any test hashes.
-var BadBlock = []byte("The magic words are squeamish ossifrage.")
-
-// Empty block
-var EmptyHash = "d41d8cd98f00b204e9800998ecf8427e"
-var EmptyBlock = []byte("")
-
-// TODO(twp): Tests still to be written
-//
-//   * TestPutBlockFull
-//       - test that PutBlock returns 503 Full if the filesystem is full.
-//         (must mock FreeDiskSpace or Statfs? use a tmpfs?)
-//
-//   * TestPutBlockWriteErr
-//       - test the behavior when Write returns an error.
-//           - Possible solutions: use a small tmpfs and a high
-//             MIN_FREE_KILOBYTES to trick PutBlock into attempting
-//             to write a block larger than the amount of space left
-//           - use an interface to mock ioutil.TempFile with a File
-//             object that always returns an error on write
-//
-// ========================================
-// GetBlock tests.
-// ========================================
-
-// TestGetBlock
-//     Test that simple block reads succeed.
-//
-func TestGetBlock(t *testing.T) {
-       defer teardown()
-
-       // Prepare two test Keep volumes. Our block is stored on the second volume.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       vols := KeepVM.AllReadable()
-       if err := vols[1].Put(context.Background(), TestHash, TestBlock); err != nil {
-               t.Error(err)
-       }
-
-       // Check that GetBlock returns success.
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), TestHash, buf, nil)
-       if err != nil {
-               t.Errorf("GetBlock error: %s", err)
-       }
-       if bytes.Compare(buf[:size], TestBlock) != 0 {
-               t.Errorf("got %v, expected %v", buf[:size], TestBlock)
-       }
-}
-
-// TestGetBlockMissing
-//     GetBlock must return an error when the block is not found.
-//
-func TestGetBlockMissing(t *testing.T) {
-       defer teardown()
-
-       // Create two empty test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       // Check that GetBlock returns failure.
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), TestHash, buf, nil)
-       if err != NotFoundError {
-               t.Errorf("Expected NotFoundError, got %v, err %v", buf[:size], err)
-       }
-}
-
-// TestGetBlockCorrupt
-//     GetBlock must return an error when a corrupted block is requested
-//     (the contents of the file do not checksum to its hash).
-//
-func TestGetBlockCorrupt(t *testing.T) {
-       defer teardown()
-
-       // Create two test Keep volumes and store a corrupt block in one.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       vols := KeepVM.AllReadable()
-       vols[0].Put(context.Background(), TestHash, BadBlock)
-
-       // Check that GetBlock returns failure.
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), TestHash, buf, nil)
-       if err != DiskHashError {
-               t.Errorf("Expected DiskHashError, got %v (buf: %v)", err, buf[:size])
-       }
-}
-
-// ========================================
-// PutBlock tests
-// ========================================
-
-// TestPutBlockOK
-//     PutBlock can perform a simple block write and returns success.
-//
-func TestPutBlockOK(t *testing.T) {
-       defer teardown()
-
-       // Create two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       // Check that PutBlock stores the data as expected.
-       if n, err := PutBlock(context.Background(), TestBlock, TestHash); err != nil || n < 1 {
-               t.Fatalf("PutBlock: n %d err %v", n, err)
-       }
-
-       vols := KeepVM.AllReadable()
-       buf := make([]byte, BlockSize)
-       n, err := vols[1].Get(context.Background(), TestHash, buf)
-       if err != nil {
-               t.Fatalf("Volume #0 Get returned error: %v", err)
-       }
-       if string(buf[:n]) != string(TestBlock) {
-               t.Fatalf("PutBlock stored '%s', Get retrieved '%s'",
-                       string(TestBlock), string(buf[:n]))
-       }
-}
-
-// TestPutBlockOneVol
-//     PutBlock still returns success even when only one of the known
-//     volumes is online.
-//
-func TestPutBlockOneVol(t *testing.T) {
-       defer teardown()
-
-       // Create two test Keep volumes, but cripple one of them.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       vols := KeepVM.AllWritable()
-       vols[0].(*MockVolume).Bad = true
-       vols[0].(*MockVolume).BadVolumeError = errors.New("Bad volume")
-
-       // Check that PutBlock stores the data as expected.
-       if n, err := PutBlock(context.Background(), TestBlock, TestHash); err != nil || n < 1 {
-               t.Fatalf("PutBlock: n %d err %v", n, err)
-       }
-
-       buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), TestHash, buf, nil)
-       if err != nil {
-               t.Fatalf("GetBlock: %v", err)
-       }
-       if bytes.Compare(buf[:size], TestBlock) != 0 {
-               t.Fatalf("PutBlock stored %+q, GetBlock retrieved %+q",
-                       TestBlock, buf[:size])
-       }
-}
-
-// TestPutBlockMD5Fail
-//     Check that PutBlock returns an error if passed a block and hash that
-//     do not match.
-//
-func TestPutBlockMD5Fail(t *testing.T) {
-       defer teardown()
-
-       // Create two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       // Check that PutBlock returns the expected error when the hash does
-       // not match the block.
-       if _, err := PutBlock(context.Background(), BadBlock, TestHash); err != RequestHashError {
-               t.Errorf("Expected RequestHashError, got %v", err)
-       }
-
-       // Confirm that GetBlock fails to return anything.
-       if result, err := GetBlock(context.Background(), TestHash, make([]byte, BlockSize), nil); err != NotFoundError {
-               t.Errorf("GetBlock succeeded after a corrupt block store (result = %s, err = %v)",
-                       string(result), err)
-       }
-}
-
-// TestPutBlockCorrupt
-//     PutBlock should overwrite corrupt blocks on disk when given
-//     a PUT request with a good block.
-//
-func TestPutBlockCorrupt(t *testing.T) {
-       defer teardown()
-
-       // Create two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       // Store a corrupted block under TestHash.
-       vols := KeepVM.AllWritable()
-       vols[0].Put(context.Background(), TestHash, BadBlock)
-       if n, err := PutBlock(context.Background(), TestBlock, TestHash); err != nil || n < 1 {
-               t.Errorf("PutBlock: n %d err %v", n, err)
-       }
-
-       // The block on disk should now match TestBlock.
-       buf := make([]byte, BlockSize)
-       if size, err := GetBlock(context.Background(), TestHash, buf, nil); err != nil {
-               t.Errorf("GetBlock: %v", err)
-       } else if bytes.Compare(buf[:size], TestBlock) != 0 {
-               t.Errorf("Got %+q, expected %+q", buf[:size], TestBlock)
-       }
-}
-
-// TestPutBlockCollision
-//     PutBlock returns a 400 Collision error when attempting to
-//     store a block that collides with another block on disk.
-//
-func TestPutBlockCollision(t *testing.T) {
-       defer teardown()
-
-       // These blocks both hash to the MD5 digest cee9a457e790cf20d4bdaa6d69f01e41.
-       b1 := arvadostest.MD5CollisionData[0]
-       b2 := arvadostest.MD5CollisionData[1]
-       locator := arvadostest.MD5CollisionMD5
-
-       // Prepare two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       // Store one block, then attempt to store the other. Confirm that
-       // PutBlock reported a CollisionError.
-       if _, err := PutBlock(context.Background(), b1, locator); err != nil {
-               t.Error(err)
-       }
-       if _, err := PutBlock(context.Background(), b2, locator); err == nil {
-               t.Error("PutBlock did not report a collision")
-       } else if err != CollisionError {
-               t.Errorf("PutBlock returned %v", err)
-       }
-}
-
-// TestPutBlockTouchFails
-//     When PutBlock is asked to PUT an existing block, but cannot
-//     modify the timestamp, it should write a second block.
-//
-func TestPutBlockTouchFails(t *testing.T) {
-       defer teardown()
-
-       // Prepare two test Keep volumes.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-       vols := KeepVM.AllWritable()
-
-       // Store a block and then make the underlying volume bad,
-       // so a subsequent attempt to update the file timestamp
-       // will fail.
-       vols[0].Put(context.Background(), TestHash, BadBlock)
-       oldMtime, err := vols[0].Mtime(TestHash)
-       if err != nil {
-               t.Fatalf("vols[0].Mtime(%s): %s\n", TestHash, err)
-       }
-
-       // vols[0].Touch will fail on the next call, so the volume
-       // manager will store a copy on vols[1] instead.
-       vols[0].(*MockVolume).Touchable = false
-       if n, err := PutBlock(context.Background(), TestBlock, TestHash); err != nil || n < 1 {
-               t.Fatalf("PutBlock: n %d err %v", n, err)
-       }
-       vols[0].(*MockVolume).Touchable = true
-
-       // Now the mtime on the block on vols[0] should be unchanged, and
-       // there should be a copy of the block on vols[1].
-       newMtime, err := vols[0].Mtime(TestHash)
-       if err != nil {
-               t.Fatalf("vols[0].Mtime(%s): %s\n", TestHash, err)
-       }
-       if !newMtime.Equal(oldMtime) {
-               t.Errorf("mtime was changed on vols[0]:\noldMtime = %v\nnewMtime = %v\n",
-                       oldMtime, newMtime)
-       }
-       buf := make([]byte, BlockSize)
-       n, err := vols[1].Get(context.Background(), TestHash, buf)
-       if err != nil {
-               t.Fatalf("vols[1]: %v", err)
-       }
-       if bytes.Compare(buf[:n], TestBlock) != 0 {
-               t.Errorf("new block does not match test block\nnew block = %v\n", buf[:n])
-       }
-}
-
-func TestDiscoverTmpfs(t *testing.T) {
-       var tempVols [4]string
-       var err error
-
-       // Create some directories suitable for using as keep volumes.
-       for i := range tempVols {
-               if tempVols[i], err = ioutil.TempDir("", "findvol"); err != nil {
-                       t.Fatal(err)
-               }
-               defer os.RemoveAll(tempVols[i])
-               tempVols[i] = tempVols[i] + "/keep"
-               if err = os.Mkdir(tempVols[i], 0755); err != nil {
-                       t.Fatal(err)
-               }
-       }
-
-       // Set up a bogus ProcMounts file.
-       f, err := ioutil.TempFile("", "keeptest")
-       if err != nil {
-               t.Fatal(err)
-       }
-       defer os.Remove(f.Name())
-       for i, vol := range tempVols {
-               // Add readonly mount points at odd indexes.
-               var opts string
-               switch i % 2 {
-               case 0:
-                       opts = "rw,nosuid,nodev,noexec"
-               case 1:
-                       opts = "nosuid,nodev,noexec,ro"
-               }
-               fmt.Fprintf(f, "tmpfs %s tmpfs %s 0 0\n", path.Dir(vol), opts)
-       }
-       f.Close()
-       ProcMounts = f.Name()
-
-       cfg := &Config{}
-       added := (&unixVolumeAdder{cfg}).Discover()
-
-       if added != len(cfg.Volumes) {
-               t.Errorf("Discover returned %d, but added %d volumes",
-                       added, len(cfg.Volumes))
-       }
-       if added != len(tempVols) {
-               t.Errorf("Discover returned %d but we set up %d volumes",
-                       added, len(tempVols))
-       }
-       for i, tmpdir := range tempVols {
-               if tmpdir != cfg.Volumes[i].(*UnixVolume).Root {
-                       t.Errorf("Discover returned %s, expected %s\n",
-                               cfg.Volumes[i].(*UnixVolume).Root, tmpdir)
-               }
-               if expectReadonly := i%2 == 1; expectReadonly != cfg.Volumes[i].(*UnixVolume).ReadOnly {
-                       t.Errorf("Discover added %s with readonly=%v, should be %v",
-                               tmpdir, !expectReadonly, expectReadonly)
-               }
-       }
-}
-
-func TestDiscoverNone(t *testing.T) {
-       defer teardown()
-
-       // Set up a bogus ProcMounts file with no Keep vols.
-       f, err := ioutil.TempFile("", "keeptest")
-       if err != nil {
-               t.Fatal(err)
-       }
-       defer os.Remove(f.Name())
-       fmt.Fprintln(f, "rootfs / rootfs opts 0 0")
-       fmt.Fprintln(f, "sysfs /sys sysfs opts 0 0")
-       fmt.Fprintln(f, "proc /proc proc opts 0 0")
-       fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
-       fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
-       f.Close()
-       ProcMounts = f.Name()
-
-       cfg := &Config{}
-       added := (&unixVolumeAdder{cfg}).Discover()
-       if added != 0 || len(cfg.Volumes) != 0 {
-               t.Fatalf("got %d, %v; expected 0, []", added, cfg.Volumes)
-       }
-}
-
-// TestIndex
-//     Test an /index request.
-func TestIndex(t *testing.T) {
-       defer teardown()
-
-       // Set up Keep volumes and populate them.
-       // Include multiple blocks on different volumes, and
-       // some metadata files.
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
-
-       vols := KeepVM.AllReadable()
-       vols[0].Put(context.Background(), TestHash, TestBlock)
-       vols[1].Put(context.Background(), TestHash2, TestBlock2)
-       vols[0].Put(context.Background(), TestHash3, TestBlock3)
-       vols[0].Put(context.Background(), TestHash+".meta", []byte("metadata"))
-       vols[1].Put(context.Background(), TestHash2+".meta", []byte("metadata"))
-
-       buf := new(bytes.Buffer)
-       vols[0].IndexTo("", buf)
-       vols[1].IndexTo("", buf)
-       indexRows := strings.Split(string(buf.Bytes()), "\n")
-       sort.Strings(indexRows)
-       sortedIndex := strings.Join(indexRows, "\n")
-       expected := `^\n` + TestHash + `\+\d+ \d+\n` +
-               TestHash3 + `\+\d+ \d+\n` +
-               TestHash2 + `\+\d+ \d+$`
-
-       match, err := regexp.MatchString(expected, sortedIndex)
-       if err == nil {
-               if !match {
-                       t.Errorf("IndexLocators returned:\n%s", string(buf.Bytes()))
-               }
-       } else {
-               t.Errorf("regexp.MatchString: %s", err)
-       }
-}
-
-// ========================================
-// Helper functions for unit tests.
-// ========================================
-
-// MakeTestVolumeManager returns a RRVolumeManager with the specified
-// number of MockVolumes.
-func MakeTestVolumeManager(numVolumes int) VolumeManager {
-       vols := make([]Volume, numVolumes)
-       for i := range vols {
-               vols[i] = CreateMockVolume()
-       }
-       return MakeRRVolumeManager(vols)
-}
-
-// teardown cleans up after each test.
-func teardown() {
-       theConfig.systemAuthToken = ""
-       theConfig.RequireSignatures = false
-       theConfig.blobSigningKey = nil
-       KeepVM = nil
-}
index 235c41891312ca6592d560678cdce78543adcce1..b2f0aa663872df877c6d64a17fe3ebd3c75f335a 100644 (file)
@@ -7,7 +7,6 @@ package main
 import (
        "fmt"
 
-       "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/prometheus/client_golang/prometheus"
 )
 
@@ -66,27 +65,6 @@ func (m *nodeMetrics) setupWorkQueueMetrics(q *WorkQueue, qName string) {
        ))
 }
 
-func (m *nodeMetrics) setupRequestMetrics(rc httpserver.RequestCounter) {
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "concurrent_requests",
-                       Help:      "Number of requests in progress",
-               },
-               func() float64 { return float64(rc.Current()) },
-       ))
-       m.reg.MustRegister(prometheus.NewGaugeFunc(
-               prometheus.GaugeOpts{
-                       Namespace: "arvados",
-                       Subsystem: "keepstore",
-                       Name:      "max_concurrent_requests",
-                       Help:      "Maximum number of concurrent requests",
-               },
-               func() float64 { return float64(rc.Max()) },
-       ))
-}
-
 type volumeMetricsVecs struct {
        ioBytes     *prometheus.CounterVec
        errCounters *prometheus.CounterVec
index 7c932ee023b2a188433e34bbf773cc0eb8b64b08..9b5606b5c405fe2fe264fb2a66a7bf8701f72997 100644 (file)
@@ -12,59 +12,39 @@ import (
        "net/http/httptest"
 
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/prometheus/client_golang/prometheus"
        check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&MountsSuite{})
+func (s *HandlerSuite) TestMounts(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 
-type MountsSuite struct {
-       vm  VolumeManager
-       rtr http.Handler
-}
-
-func (s *MountsSuite) SetUpTest(c *check.C) {
-       s.vm = MakeTestVolumeManager(2)
-       KeepVM = s.vm
-       theConfig = DefaultConfig()
-       theConfig.systemAuthToken = arvadostest.DataManagerToken
-       theConfig.ManagementToken = arvadostest.ManagementToken
-       r := prometheus.NewRegistry()
-       theConfig.Start(r)
-       s.rtr = MakeRESTRouter(testCluster, r)
-}
-
-func (s *MountsSuite) TearDownTest(c *check.C) {
-       s.vm.Close()
-       KeepVM = nil
-       theConfig = DefaultConfig()
-       theConfig.Start(prometheus.NewRegistry())
-}
-
-func (s *MountsSuite) TestMounts(c *check.C) {
-       vols := s.vm.AllWritable()
+       vols := s.handler.volmgr.AllWritable()
        vols[0].Put(context.Background(), TestHash, TestBlock)
        vols[1].Put(context.Background(), TestHash2, TestBlock2)
 
        resp := s.call("GET", "/mounts", "", nil)
        c.Check(resp.Code, check.Equals, http.StatusOK)
        var mntList []struct {
-               UUID           string   `json:"uuid"`
-               DeviceID       string   `json:"device_id"`
-               ReadOnly       bool     `json:"read_only"`
-               Replication    int      `json:"replication"`
-               StorageClasses []string `json:"storage_classes"`
+               UUID           string          `json:"uuid"`
+               DeviceID       string          `json:"device_id"`
+               ReadOnly       bool            `json:"read_only"`
+               Replication    int             `json:"replication"`
+               StorageClasses map[string]bool `json:"storage_classes"`
        }
+       c.Log(resp.Body.String())
        err := json.Unmarshal(resp.Body.Bytes(), &mntList)
        c.Assert(err, check.IsNil)
        c.Assert(len(mntList), check.Equals, 2)
        for _, m := range mntList {
                c.Check(len(m.UUID), check.Equals, 27)
-               c.Check(m.UUID[:12], check.Equals, "zzzzz-ivpuk-")
+               c.Check(m.UUID[:12], check.Equals, "zzzzz-nyw5e-")
                c.Check(m.DeviceID, check.Equals, "mock-device-id")
                c.Check(m.ReadOnly, check.Equals, false)
                c.Check(m.Replication, check.Equals, 1)
-               c.Check(m.StorageClasses, check.DeepEquals, []string{"default"})
+               c.Check(m.StorageClasses, check.DeepEquals, map[string]bool{"default": true})
        }
        c.Check(mntList[0].UUID, check.Not(check.Equals), mntList[1].UUID)
 
@@ -103,7 +83,12 @@ func (s *MountsSuite) TestMounts(c *check.C) {
        c.Check(resp.Body.String(), check.Equals, "\n")
 }
 
-func (s *MountsSuite) TestMetrics(c *check.C) {
+func (s *HandlerSuite) TestMetrics(c *check.C) {
+       reg := prometheus.NewRegistry()
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", reg, testServiceURL), check.IsNil)
+       instrumented := httpserver.Instrument(reg, ctxlog.TestLogger(c), s.handler.Handler)
+       s.handler.Handler = instrumented.ServeAPI(s.cluster.ManagementToken, instrumented)
+
        s.call("PUT", "/"+TestHash, "", TestBlock)
        s.call("PUT", "/"+TestHash2, "", TestBlock2)
        resp := s.call("GET", "/metrics.json", "", nil)
@@ -145,8 +130,6 @@ func (s *MountsSuite) TestMetrics(c *check.C) {
                        }
                }
        }
-       c.Check(found["request_duration_seconds"], check.Equals, true)
-       c.Check(found["time_to_status_seconds"], check.Equals, true)
 
        metricsNames := []string{
                "arvados_keepstore_bufferpool_inuse_buffers",
@@ -154,25 +137,22 @@ func (s *MountsSuite) TestMetrics(c *check.C) {
                "arvados_keepstore_bufferpool_allocated_bytes",
                "arvados_keepstore_pull_queue_inprogress_entries",
                "arvados_keepstore_pull_queue_pending_entries",
-               "arvados_keepstore_concurrent_requests",
-               "arvados_keepstore_max_concurrent_requests",
                "arvados_keepstore_trash_queue_inprogress_entries",
                "arvados_keepstore_trash_queue_pending_entries",
                "request_duration_seconds",
-               "time_to_status_seconds",
        }
        for _, m := range metricsNames {
                _, ok := names[m]
-               c.Check(ok, check.Equals, true)
+               c.Check(ok, check.Equals, true, check.Commentf("checking metric %q", m))
        }
 }
 
-func (s *MountsSuite) call(method, path, tok string, body []byte) *httptest.ResponseRecorder {
+func (s *HandlerSuite) call(method, path, tok string, body []byte) *httptest.ResponseRecorder {
        resp := httptest.NewRecorder()
        req, _ := http.NewRequest(method, path, bytes.NewReader(body))
        if tok != "" {
                req.Header.Set("Authorization", "Bearer "+tok)
        }
-       s.rtr.ServeHTTP(resp, req)
+       s.handler.ServeHTTP(resp, req)
        return resp
 }
index 49a231685a6191558e3f3bb8a8d9b92ed3f22237..e2155f94f7ec4232dbb17846f826e72b14354ad0 100644 (file)
@@ -5,14 +5,16 @@
 package main
 
 import (
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
 // SignLocator takes a blobLocator, an apiToken and an expiry time, and
 // returns a signed locator string.
-func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
-       return keepclient.SignLocator(blobLocator, apiToken, expiry, theConfig.BlobSignatureTTL.Duration(), theConfig.blobSigningKey)
+func SignLocator(cluster *arvados.Cluster, blobLocator, apiToken string, expiry time.Time) string {
+       return keepclient.SignLocator(blobLocator, apiToken, expiry, cluster.Collections.BlobSigningTTL.Duration(), []byte(cluster.Collections.BlobSigningKey))
 }
 
 // VerifySignature returns nil if the signature on the signedLocator
@@ -20,8 +22,8 @@ func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
 // either ExpiredError (if the timestamp has expired, which is
 // something the client could have figured out independently) or
 // PermissionError.
-func VerifySignature(signedLocator, apiToken string) error {
-       err := keepclient.VerifySignature(signedLocator, apiToken, theConfig.BlobSignatureTTL.Duration(), theConfig.blobSigningKey)
+func VerifySignature(cluster *arvados.Cluster, signedLocator, apiToken string) error {
+       err := keepclient.VerifySignature(signedLocator, apiToken, cluster.Collections.BlobSigningTTL.Duration(), []byte(cluster.Collections.BlobSigningKey))
        if err == keepclient.ErrSignatureExpired {
                return ExpiredError
        } else if err != nil {
index dd57faf2771d65a73f4a87cb1af57aea07424064..6ec4887ce164c4b3b8ecf2ebeef739ed8fc17ba1 100644 (file)
@@ -6,10 +6,10 @@ package main
 
 import (
        "strconv"
-       "testing"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       check "gopkg.in/check.v1"
 )
 
 const (
@@ -30,44 +30,34 @@ const (
        knownSignedLocator = knownLocator + knownSigHint
 )
 
-func TestSignLocator(t *testing.T) {
-       defer func(b []byte) {
-               theConfig.blobSigningKey = b
-       }(theConfig.blobSigningKey)
-
+func (s *HandlerSuite) TestSignLocator(c *check.C) {
        tsInt, err := strconv.ParseInt(knownTimestamp, 16, 0)
        if err != nil {
-               t.Fatal(err)
+               c.Fatal(err)
        }
        t0 := time.Unix(tsInt, 0)
 
-       theConfig.BlobSignatureTTL = knownSignatureTTL
-
-       theConfig.blobSigningKey = []byte(knownKey)
-       if x := SignLocator(knownLocator, knownToken, t0); x != knownSignedLocator {
-               t.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
+       s.cluster.Collections.BlobSigningTTL = knownSignatureTTL
+       s.cluster.Collections.BlobSigningKey = knownKey
+       if x := SignLocator(s.cluster, knownLocator, knownToken, t0); x != knownSignedLocator {
+               c.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
        }
 
-       theConfig.blobSigningKey = []byte("arbitrarykey")
-       if x := SignLocator(knownLocator, knownToken, t0); x == knownSignedLocator {
-               t.Fatalf("Got same signature %+q, even though blobSigningKey changed", x)
+       s.cluster.Collections.BlobSigningKey = "arbitrarykey"
+       if x := SignLocator(s.cluster, knownLocator, knownToken, t0); x == knownSignedLocator {
+               c.Fatalf("Got same signature %+q, even though blobSigningKey changed", x)
        }
 }
 
-func TestVerifyLocator(t *testing.T) {
-       defer func(b []byte) {
-               theConfig.blobSigningKey = b
-       }(theConfig.blobSigningKey)
-
-       theConfig.BlobSignatureTTL = knownSignatureTTL
-
-       theConfig.blobSigningKey = []byte(knownKey)
-       if err := VerifySignature(knownSignedLocator, knownToken); err != nil {
-               t.Fatal(err)
+func (s *HandlerSuite) TestVerifyLocator(c *check.C) {
+       s.cluster.Collections.BlobSigningTTL = knownSignatureTTL
+       s.cluster.Collections.BlobSigningKey = knownKey
+       if err := VerifySignature(s.cluster, knownSignedLocator, knownToken); err != nil {
+               c.Fatal(err)
        }
 
-       theConfig.blobSigningKey = []byte("arbitrarykey")
-       if err := VerifySignature(knownSignedLocator, knownToken); err == nil {
-               t.Fatal("Verified signature even with wrong blobSigningKey")
+       s.cluster.Collections.BlobSigningKey = "arbitrarykey"
+       if err := VerifySignature(s.cluster, knownSignedLocator, knownToken); err == nil {
+               c.Fatal("Verified signature even with wrong blobSigningKey")
        }
 }
index 1f82f3f4fc79d82c8f1a6fa1586410d94c9d76dc..fac9c542f1b279d176005968614bda042d66aaa5 100644 (file)
@@ -25,7 +25,7 @@ type remoteProxy struct {
        mtx     sync.Mutex
 }
 
-func (rp *remoteProxy) Get(ctx context.Context, w http.ResponseWriter, r *http.Request, cluster *arvados.Cluster) {
+func (rp *remoteProxy) Get(ctx context.Context, w http.ResponseWriter, r *http.Request, cluster *arvados.Cluster, volmgr *RRVolumeManager) {
        // Intervening proxies must not return a cached GET response
        // to a prior request if a X-Keep-Signature request header has
        // been added or changed.
@@ -49,6 +49,8 @@ func (rp *remoteProxy) Get(ctx context.Context, w http.ResponseWriter, r *http.R
                        Buffer:         buf[:0],
                        ResponseWriter: w,
                        Context:        ctx,
+                       Cluster:        cluster,
+                       VolumeManager:  volmgr,
                }
                defer rrc.Close()
                w = rrc
@@ -145,10 +147,12 @@ var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^\+]*`)
 // local volume, adds a response header with a locally-signed locator,
 // and finally writes the data through.
 type remoteResponseCacher struct {
-       Locator string
-       Token   string
-       Buffer  []byte
-       Context context.Context
+       Locator       string
+       Token         string
+       Buffer        []byte
+       Context       context.Context
+       Cluster       *arvados.Cluster
+       VolumeManager *RRVolumeManager
        http.ResponseWriter
        statusCode int
 }
@@ -173,7 +177,7 @@ func (rrc *remoteResponseCacher) Close() error {
                rrc.ResponseWriter.Write(rrc.Buffer)
                return nil
        }
-       _, err := PutBlock(rrc.Context, rrc.Buffer, rrc.Locator[:32])
+       _, err := PutBlock(rrc.Context, rrc.VolumeManager, rrc.Buffer, rrc.Locator[:32])
        if rrc.Context.Err() != nil {
                // If caller hung up, log that instead of subsequent/misleading errors.
                http.Error(rrc.ResponseWriter, rrc.Context.Err().Error(), http.StatusGatewayTimeout)
@@ -193,7 +197,8 @@ func (rrc *remoteResponseCacher) Close() error {
        }
 
        unsigned := localOrRemoteSignature.ReplaceAllLiteralString(rrc.Locator, "")
-       signed := SignLocator(unsigned, rrc.Token, time.Now().Add(theConfig.BlobSignatureTTL.Duration()))
+       expiry := time.Now().Add(rrc.Cluster.Collections.BlobSigningTTL.Duration())
+       signed := SignLocator(rrc.Cluster, unsigned, rrc.Token, expiry)
        if signed == unsigned {
                err = errors.New("could not sign locator")
                http.Error(rrc.ResponseWriter, err.Error(), http.StatusInternalServerError)
index 6c22d1d32aa2f0745a2cc424cdfeef4d4d76ca75..6483d6cf01310af98d78b5fd1074b3301004bd83 100644 (file)
@@ -5,6 +5,7 @@
 package main
 
 import (
+       "context"
        "crypto/md5"
        "encoding/json"
        "fmt"
@@ -28,8 +29,7 @@ var _ = check.Suite(&ProxyRemoteSuite{})
 
 type ProxyRemoteSuite struct {
        cluster *arvados.Cluster
-       vm      VolumeManager
-       rtr     http.Handler
+       handler *handler
 
        remoteClusterID      string
        remoteBlobSigningKey []byte
@@ -87,7 +87,9 @@ func (s *ProxyRemoteSuite) SetUpTest(c *check.C) {
        s.remoteKeepproxy = httptest.NewServer(http.HandlerFunc(s.remoteKeepproxyHandler))
        s.remoteAPI = httptest.NewUnstartedServer(http.HandlerFunc(s.remoteAPIHandler))
        s.remoteAPI.StartTLS()
-       s.cluster = arvados.IntegrationTestCluster()
+       s.cluster = testCluster(c)
+       s.cluster.Collections.BlobSigningKey = knownKey
+       s.cluster.SystemRootToken = arvadostest.DataManagerToken
        s.cluster.RemoteClusters = map[string]arvados.RemoteCluster{
                s.remoteClusterID: arvados.RemoteCluster{
                        Host:     strings.Split(s.remoteAPI.URL, "//")[1],
@@ -96,21 +98,12 @@ func (s *ProxyRemoteSuite) SetUpTest(c *check.C) {
                        Insecure: true,
                },
        }
-       s.vm = MakeTestVolumeManager(2)
-       KeepVM = s.vm
-       theConfig = DefaultConfig()
-       theConfig.systemAuthToken = arvadostest.DataManagerToken
-       theConfig.blobSigningKey = []byte(knownKey)
-       r := prometheus.NewRegistry()
-       theConfig.Start(r)
-       s.rtr = MakeRESTRouter(s.cluster, r)
+       s.cluster.Volumes = map[string]arvados.Volume{"zzzzz-nyw5e-000000000000000": {Driver: "mock"}}
+       s.handler = &handler{}
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
 }
 
 func (s *ProxyRemoteSuite) TearDownTest(c *check.C) {
-       s.vm.Close()
-       KeepVM = nil
-       theConfig = DefaultConfig()
-       theConfig.Start(prometheus.NewRegistry())
        s.remoteAPI.Close()
        s.remoteKeepproxy.Close()
 }
@@ -191,7 +184,7 @@ func (s *ProxyRemoteSuite) TestProxyRemote(c *check.C) {
                        req.Header.Set("X-Keep-Signature", trial.xKeepSignature)
                }
                resp = httptest.NewRecorder()
-               s.rtr.ServeHTTP(resp, req)
+               s.handler.ServeHTTP(resp, req)
                c.Check(s.remoteKeepRequests, check.Equals, trial.expectRemoteReqs)
                c.Check(resp.Code, check.Equals, trial.expectCode)
                if resp.Code == http.StatusOK {
@@ -210,13 +203,13 @@ func (s *ProxyRemoteSuite) TestProxyRemote(c *check.C) {
 
                c.Check(locHdr, check.Not(check.Equals), "")
                c.Check(locHdr, check.Not(check.Matches), `.*\+R.*`)
-               c.Check(VerifySignature(locHdr, trial.token), check.IsNil)
+               c.Check(VerifySignature(s.cluster, locHdr, trial.token), check.IsNil)
 
                // Ensure block can be requested using new signature
                req = httptest.NewRequest("GET", "/"+locHdr, nil)
                req.Header.Set("Authorization", "Bearer "+trial.token)
                resp = httptest.NewRecorder()
-               s.rtr.ServeHTTP(resp, req)
+               s.handler.ServeHTTP(resp, req)
                c.Check(resp.Code, check.Equals, http.StatusOK)
                c.Check(s.remoteKeepRequests, check.Equals, trial.expectRemoteReqs)
        }
index 42b5d5889d30984685d43655f66fb3857f98de92..100d08838d84f3597403d0d56374cc035d7fdddb 100644 (file)
@@ -6,7 +6,6 @@ package main
 
 import (
        "context"
-       "crypto/rand"
        "fmt"
        "io"
        "io/ioutil"
@@ -18,15 +17,15 @@ import (
 // RunPullWorker receives PullRequests from pullq, invokes
 // PullItemAndProcess on each one. After each PR, it logs a message
 // indicating whether the pull was successful.
-func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
+func (h *handler) runPullWorker(pullq *WorkQueue) {
        for item := range pullq.NextItem {
                pr := item.(PullRequest)
-               err := PullItemAndProcess(pr, keepClient)
+               err := h.pullItemAndProcess(pr)
                pullq.DoneItem <- struct{}{}
                if err == nil {
-                       log.Printf("Pull %s success", pr)
+                       h.Logger.Printf("Pull %s success", pr)
                } else {
-                       log.Printf("Pull %s error: %s", pr, err)
+                       h.Logger.Printf("Pull %s error: %s", pr, err)
                }
        }
 }
@@ -39,28 +38,28 @@ func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
 // only attempt to write the data to the corresponding
 // volume. Otherwise it writes to any local volume, as a PUT request
 // would.
-func PullItemAndProcess(pullRequest PullRequest, keepClient *keepclient.KeepClient) error {
-       var vol Volume
+func (h *handler) pullItemAndProcess(pullRequest PullRequest) error {
+       var vol *VolumeMount
        if uuid := pullRequest.MountUUID; uuid != "" {
-               vol = KeepVM.Lookup(pullRequest.MountUUID, true)
+               vol = h.volmgr.Lookup(pullRequest.MountUUID, true)
                if vol == nil {
                        return fmt.Errorf("pull req has nonexistent mount: %v", pullRequest)
                }
        }
 
-       keepClient.Arvados.ApiToken = randomToken
-
+       // Make a private copy of keepClient so we can set
+       // ServiceRoots to the source servers specified in the pull
+       // request.
+       keepClient := *h.keepClient
        serviceRoots := make(map[string]string)
        for _, addr := range pullRequest.Servers {
                serviceRoots[addr] = addr
        }
        keepClient.SetServiceRoots(serviceRoots, nil, nil)
 
-       // Generate signature with a random token
-       expiresAt := time.Now().Add(60 * time.Second)
-       signedLocator := SignLocator(pullRequest.Locator, randomToken, expiresAt)
+       signedLocator := SignLocator(h.Cluster, pullRequest.Locator, keepClient.Arvados.ApiToken, time.Now().Add(time.Minute))
 
-       reader, contentLen, _, err := GetContent(signedLocator, keepClient)
+       reader, contentLen, _, err := GetContent(signedLocator, &keepClient)
        if err != nil {
                return err
        }
@@ -78,8 +77,7 @@ func PullItemAndProcess(pullRequest PullRequest, keepClient *keepclient.KeepClie
                return fmt.Errorf("Content not found for: %s", signedLocator)
        }
 
-       writePulledBlock(vol, readContent, pullRequest.Locator)
-       return nil
+       return writePulledBlock(h.volmgr, vol, readContent, pullRequest.Locator)
 }
 
 // Fetch the content for the given locator using keepclient.
@@ -87,24 +85,11 @@ var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
        return keepClient.Get(signedLocator)
 }
 
-var writePulledBlock = func(volume Volume, data []byte, locator string) {
-       var err error
+var writePulledBlock = func(volmgr *RRVolumeManager, volume Volume, data []byte, locator string) error {
        if volume != nil {
-               err = volume.Put(context.Background(), locator, data)
+               return volume.Put(context.Background(), locator, data)
        } else {
-               _, err = PutBlock(context.Background(), data, locator)
-       }
-       if err != nil {
-               log.Printf("error writing pulled block %q: %s", locator, err)
+               _, err := PutBlock(context.Background(), volmgr, data, locator)
+               return err
        }
 }
-
-var randomToken = func() string {
-       const alphaNumeric = "0123456789abcdefghijklmnopqrstuvwxyz"
-       var bytes = make([]byte, 36)
-       rand.Read(bytes)
-       for i, b := range bytes {
-               bytes[i] = alphaNumeric[b%byte(len(alphaNumeric))]
-       }
-       return (string(bytes))
-}()
index 231a4c0ab28097340f558c6179fd14d9d50b9d3f..a35b744c5f62254051305e81914ab038b2f09183 100644 (file)
@@ -6,20 +6,18 @@ package main
 
 import (
        "bytes"
+       "context"
        "errors"
        "io"
        "io/ioutil"
-       "os"
        "strings"
-       "testing"
 
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "github.com/prometheus/client_golang/prometheus"
+       check "gopkg.in/check.v1"
 )
 
-var keepClient *keepclient.KeepClient
-
 type PullWorkIntegrationTestData struct {
        Name     string
        Locator  string
@@ -27,55 +25,31 @@ type PullWorkIntegrationTestData struct {
        GetError string
 }
 
-func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTestData, wantData bool) PullRequest {
-       os.Setenv("ARVADOS_API_HOST_INSECURE", "true")
-
-       // start api and keep servers
-       arvadostest.StartAPI()
+func (s *HandlerSuite) setupPullWorkerIntegrationTest(c *check.C, testData PullWorkIntegrationTestData, wantData bool) PullRequest {
        arvadostest.StartKeep(2, false)
-
-       // make arvadosclient
-       arv, err := arvadosclient.MakeArvadosClient()
-       if err != nil {
-               t.Fatalf("Error creating arv: %s", err)
-       }
-
-       // keep client
-       keepClient, err = keepclient.MakeKeepClient(arv)
-       if err != nil {
-               t.Fatalf("error creating KeepClient: %s", err)
-       }
-       keepClient.Want_replicas = 1
-       keepClient.RefreshServiceDiscovery()
-
-       // discover keep services
-       var servers []string
-       for _, host := range keepClient.LocalRoots() {
-               servers = append(servers, host)
-       }
-
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
        // Put content if the test needs it
        if wantData {
-               locator, _, err := keepClient.PutB([]byte(testData.Content))
+               locator, _, err := s.handler.keepClient.PutB([]byte(testData.Content))
                if err != nil {
-                       t.Errorf("Error putting test data in setup for %s %s %v", testData.Content, locator, err)
+                       c.Errorf("Error putting test data in setup for %s %s %v", testData.Content, locator, err)
                }
                if locator == "" {
-                       t.Errorf("No locator found after putting test data")
+                       c.Errorf("No locator found after putting test data")
                }
        }
 
        // Create pullRequest for the test
        pullRequest := PullRequest{
                Locator: testData.Locator,
-               Servers: servers,
        }
        return pullRequest
 }
 
 // Do a get on a block that is not existing in any of the keep servers.
 // Expect "block not found" error.
-func TestPullWorkerIntegration_GetNonExistingLocator(t *testing.T) {
+func (s *HandlerSuite) TestPullWorkerIntegration_GetNonExistingLocator(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
        testData := PullWorkIntegrationTestData{
                Name:     "TestPullWorkerIntegration_GetLocator",
                Locator:  "5d41402abc4b2a76b9719d911017c592",
@@ -83,16 +57,17 @@ func TestPullWorkerIntegration_GetNonExistingLocator(t *testing.T) {
                GetError: "Block not found",
        }
 
-       pullRequest := SetupPullWorkerIntegrationTest(t, testData, false)
+       pullRequest := s.setupPullWorkerIntegrationTest(c, testData, false)
        defer arvadostest.StopAPI()
        defer arvadostest.StopKeep(2)
 
-       performPullWorkerIntegrationTest(testData, pullRequest, t)
+       s.performPullWorkerIntegrationTest(testData, pullRequest, c)
 }
 
 // Do a get on a block that exists on one of the keep servers.
 // The setup method will create this block before doing the get.
-func TestPullWorkerIntegration_GetExistingLocator(t *testing.T) {
+func (s *HandlerSuite) TestPullWorkerIntegration_GetExistingLocator(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
        testData := PullWorkIntegrationTestData{
                Name:     "TestPullWorkerIntegration_GetLocator",
                Locator:  "5d41402abc4b2a76b9719d911017c592",
@@ -100,24 +75,23 @@ func TestPullWorkerIntegration_GetExistingLocator(t *testing.T) {
                GetError: "",
        }
 
-       pullRequest := SetupPullWorkerIntegrationTest(t, testData, true)
+       pullRequest := s.setupPullWorkerIntegrationTest(c, testData, true)
        defer arvadostest.StopAPI()
        defer arvadostest.StopKeep(2)
 
-       performPullWorkerIntegrationTest(testData, pullRequest, t)
+       s.performPullWorkerIntegrationTest(testData, pullRequest, c)
 }
 
 // Perform the test.
 // The test directly invokes the "PullItemAndProcess" rather than
 // putting an item on the pullq so that the errors can be verified.
-func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pullRequest PullRequest, t *testing.T) {
+func (s *HandlerSuite) performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pullRequest PullRequest, c *check.C) {
 
        // Override writePulledBlock to mock PutBlock functionality
-       defer func(orig func(Volume, []byte, string)) { writePulledBlock = orig }(writePulledBlock)
-       writePulledBlock = func(v Volume, content []byte, locator string) {
-               if string(content) != testData.Content {
-                       t.Errorf("writePulledBlock invoked with unexpected data. Expected: %s; Found: %s", testData.Content, content)
-               }
+       defer func(orig func(*RRVolumeManager, Volume, []byte, string) error) { writePulledBlock = orig }(writePulledBlock)
+       writePulledBlock = func(_ *RRVolumeManager, _ Volume, content []byte, _ string) error {
+               c.Check(string(content), check.Equals, testData.Content)
+               return nil
        }
 
        // Override GetContent to mock keepclient Get functionality
@@ -132,15 +106,15 @@ func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pull
                return rdr, int64(len(testData.Content)), "", nil
        }
 
-       err := PullItemAndProcess(pullRequest, keepClient)
+       err := s.handler.pullItemAndProcess(pullRequest)
 
        if len(testData.GetError) > 0 {
                if (err == nil) || (!strings.Contains(err.Error(), testData.GetError)) {
-                       t.Errorf("Got error %v, expected %v", err, testData.GetError)
+                       c.Errorf("Got error %v, expected %v", err, testData.GetError)
                }
        } else {
                if err != nil {
-                       t.Errorf("Got error %v, expected nil", err)
+                       c.Errorf("Got error %v, expected nil", err)
                }
        }
 }
index 8e667e048f47ff3f3ac91df65c960ac94511e8b3..6a7a0b7a883468f4cc1c00441b80fd35740fce5b 100644 (file)
@@ -6,21 +6,26 @@ package main
 
 import (
        "bytes"
+       "context"
        "errors"
        "io"
        "io/ioutil"
        "net/http"
        "time"
 
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "github.com/prometheus/client_golang/prometheus"
        . "gopkg.in/check.v1"
+       check "gopkg.in/check.v1"
 )
 
 var _ = Suite(&PullWorkerTestSuite{})
 
 type PullWorkerTestSuite struct {
+       cluster *arvados.Cluster
+       handler *handler
+
        testPullLists map[string]string
        readContent   string
        readError     error
@@ -29,7 +34,16 @@ type PullWorkerTestSuite struct {
 }
 
 func (s *PullWorkerTestSuite) SetUpTest(c *C) {
-       theConfig.systemAuthToken = "arbitrary data manager token"
+       s.cluster = testCluster(c)
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-000000000000000": {Driver: "mock"},
+               "zzzzz-nyw5e-111111111111111": {Driver: "mock"},
+       }
+       s.cluster.Collections.BlobReplicateConcurrency = 1
+
+       s.handler = &handler{}
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+
        s.readContent = ""
        s.readError = nil
        s.putContent = []byte{}
@@ -39,27 +53,6 @@ func (s *PullWorkerTestSuite) SetUpTest(c *C) {
        // This behavior is verified using these two maps in the
        // "TestPullWorkerPullList_with_two_items_latest_replacing_old"
        s.testPullLists = make(map[string]string)
-
-       KeepVM = MakeTestVolumeManager(2)
-
-       // Normally the pull queue and workers are started by main()
-       // -- tests need to set up their own.
-       arv, err := arvadosclient.MakeArvadosClient()
-       c.Assert(err, IsNil)
-       keepClient, err := keepclient.MakeKeepClient(arv)
-       c.Assert(err, IsNil)
-       pullq = NewWorkQueue()
-       go RunPullWorker(pullq, keepClient)
-}
-
-func (s *PullWorkerTestSuite) TearDownTest(c *C) {
-       KeepVM.Close()
-       KeepVM = nil
-       pullq.Close()
-       pullq = nil
-       teardown()
-       theConfig = DefaultConfig()
-       theConfig.Start(prometheus.NewRegistry())
 }
 
 var firstPullList = []byte(`[
@@ -100,9 +93,10 @@ type PullWorkerTestData struct {
 // Ensure MountUUID in a pull list is correctly translated to a Volume
 // argument passed to writePulledBlock().
 func (s *PullWorkerTestSuite) TestSpecifyMountUUID(c *C) {
-       defer func(f func(Volume, []byte, string)) {
+       defer func(f func(*RRVolumeManager, Volume, []byte, string) error) {
                writePulledBlock = f
        }(writePulledBlock)
+       pullq := s.handler.Handler.(*router).pullq
 
        for _, spec := range []struct {
                sendUUID     string
@@ -113,17 +107,18 @@ func (s *PullWorkerTestSuite) TestSpecifyMountUUID(c *C) {
                        expectVolume: nil,
                },
                {
-                       sendUUID:     KeepVM.Mounts()[0].UUID,
-                       expectVolume: KeepVM.Mounts()[0].volume,
+                       sendUUID:     s.handler.volmgr.Mounts()[0].UUID,
+                       expectVolume: s.handler.volmgr.Mounts()[0].Volume,
                },
        } {
-               writePulledBlock = func(v Volume, _ []byte, _ string) {
+               writePulledBlock = func(_ *RRVolumeManager, v Volume, _ []byte, _ string) error {
                        c.Check(v, Equals, spec.expectVolume)
+                       return nil
                }
 
-               resp := IssueRequest(&RequestTester{
+               resp := IssueRequest(s.handler, &RequestTester{
                        uri:      "/pull",
-                       apiToken: theConfig.systemAuthToken,
+                       apiToken: s.cluster.SystemRootToken,
                        method:   "PUT",
                        requestBody: []byte(`[{
                                "locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
@@ -141,7 +136,7 @@ func (s *PullWorkerTestSuite) TestSpecifyMountUUID(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_locators(c *C) {
        testData := PullWorkerTestData{
                name:         "TestPullWorkerPullList_with_two_locators",
-               req:          RequestTester{"/pull", theConfig.systemAuthToken, "PUT", firstPullList},
+               req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", firstPullList},
                responseCode: http.StatusOK,
                responseBody: "Received 2 pull requests\n",
                readContent:  "hello",
@@ -155,7 +150,7 @@ func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_two_locators(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_one_locator(c *C) {
        testData := PullWorkerTestData{
                name:         "TestPullWorkerPullList_with_one_locator",
-               req:          RequestTester{"/pull", theConfig.systemAuthToken, "PUT", secondPullList},
+               req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", secondPullList},
                responseCode: http.StatusOK,
                responseBody: "Received 1 pull requests\n",
                readContent:  "hola",
@@ -169,7 +164,7 @@ func (s *PullWorkerTestSuite) TestPullWorkerPullList_with_one_locator(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_one_locator(c *C) {
        testData := PullWorkerTestData{
                name:         "TestPullWorker_error_on_get_one_locator",
-               req:          RequestTester{"/pull", theConfig.systemAuthToken, "PUT", secondPullList},
+               req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", secondPullList},
                responseCode: http.StatusOK,
                responseBody: "Received 1 pull requests\n",
                readContent:  "unused",
@@ -183,7 +178,7 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_one_locator(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_two_locators(c *C) {
        testData := PullWorkerTestData{
                name:         "TestPullWorker_error_on_get_two_locators",
-               req:          RequestTester{"/pull", theConfig.systemAuthToken, "PUT", firstPullList},
+               req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", firstPullList},
                responseCode: http.StatusOK,
                responseBody: "Received 2 pull requests\n",
                readContent:  "unused",
@@ -197,7 +192,7 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_get_two_locators(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_one_locator(c *C) {
        testData := PullWorkerTestData{
                name:         "TestPullWorker_error_on_put_one_locator",
-               req:          RequestTester{"/pull", theConfig.systemAuthToken, "PUT", secondPullList},
+               req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", secondPullList},
                responseCode: http.StatusOK,
                responseBody: "Received 1 pull requests\n",
                readContent:  "hello hello",
@@ -211,7 +206,7 @@ func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_one_locator(c *C) {
 func (s *PullWorkerTestSuite) TestPullWorker_error_on_put_two_locators(c *C) {
        testData := PullWorkerTestData{
                name:         "TestPullWorker_error_on_put_two_locators",
-               req:          RequestTester{"/pull", theConfig.systemAuthToken, "PUT", firstPullList},
+               req:          RequestTester{"/pull", s.cluster.SystemRootToken, "PUT", firstPullList},
                responseCode: http.StatusOK,
                responseBody: "Received 2 pull requests\n",
                readContent:  "hello again",
@@ -238,6 +233,8 @@ func (s *PullWorkerTestSuite) TestPullWorker_invalidToken(c *C) {
 }
 
 func (s *PullWorkerTestSuite) performTest(testData PullWorkerTestData, c *C) {
+       pullq := s.handler.Handler.(*router).pullq
+
        s.testPullLists[testData.name] = testData.responseBody
 
        processedPullLists := make(map[string]string)
@@ -247,7 +244,7 @@ func (s *PullWorkerTestSuite) performTest(testData PullWorkerTestData, c *C) {
                GetContent = orig
        }(GetContent)
        GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (reader io.ReadCloser, contentLength int64, url string, err error) {
-               c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(1))
+               c.Assert(getStatusItem(s.handler, "PullQueue", "InProgress"), Equals, float64(1))
                processedPullLists[testData.name] = testData.responseBody
                if testData.readError {
                        err = errors.New("Error getting data")
@@ -261,20 +258,21 @@ func (s *PullWorkerTestSuite) performTest(testData PullWorkerTestData, c *C) {
        }
 
        // Override writePulledBlock to mock PutBlock functionality
-       defer func(orig func(Volume, []byte, string)) { writePulledBlock = orig }(writePulledBlock)
-       writePulledBlock = func(v Volume, content []byte, locator string) {
+       defer func(orig func(*RRVolumeManager, Volume, []byte, string) error) { writePulledBlock = orig }(writePulledBlock)
+       writePulledBlock = func(_ *RRVolumeManager, v Volume, content []byte, locator string) error {
                if testData.putError {
                        s.putError = errors.New("Error putting data")
-                       return
+                       return s.putError
                }
                s.putContent = content
+               return nil
        }
 
-       c.Check(getStatusItem("PullQueue", "InProgress"), Equals, float64(0))
-       c.Check(getStatusItem("PullQueue", "Queued"), Equals, float64(0))
-       c.Check(getStatusItem("Version"), Not(Equals), "")
+       c.Check(getStatusItem(s.handler, "PullQueue", "InProgress"), Equals, float64(0))
+       c.Check(getStatusItem(s.handler, "PullQueue", "Queued"), Equals, float64(0))
+       c.Check(getStatusItem(s.handler, "Version"), Not(Equals), "")
 
-       response := IssueRequest(&testData.req)
+       response := IssueRequest(s.handler, &testData.req)
        c.Assert(response.Code, Equals, testData.responseCode)
        c.Assert(response.Body.String(), Equals, testData.responseBody)
 
index 4c39dcd5c4f12fc9a8b8ad36d165545af952fb7a..220377af280f2d64c682624ee69a67cfd6f3b636 100644 (file)
@@ -5,15 +5,18 @@
 package main
 
 import (
+       "bufio"
        "bytes"
        "context"
        "crypto/sha256"
        "encoding/base64"
        "encoding/hex"
-       "flag"
+       "encoding/json"
+       "errors"
        "fmt"
        "io"
        "io/ioutil"
+       "log"
        "net/http"
        "os"
        "regexp"
@@ -26,8 +29,77 @@ import (
        "github.com/AdRoll/goamz/aws"
        "github.com/AdRoll/goamz/s3"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
+func init() {
+       driver["S3"] = newS3Volume
+}
+
+func newS3Volume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
+       v := &S3Volume{cluster: cluster, volume: volume, logger: logger, metrics: metrics}
+       err := json.Unmarshal(volume.DriverParameters, &v)
+       if err != nil {
+               return nil, err
+       }
+       return v, v.check()
+}
+
+func (v *S3Volume) check() error {
+       if v.Bucket == "" {
+               return errors.New("DriverParameters: Bucket must be provided")
+       }
+       if v.IndexPageSize == 0 {
+               v.IndexPageSize = 1000
+       }
+       if v.RaceWindow < 0 {
+               return errors.New("DriverParameters: RaceWindow must not be negative")
+       }
+
+       var ok bool
+       v.region, ok = aws.Regions[v.Region]
+       if v.Endpoint == "" {
+               if !ok {
+                       return fmt.Errorf("unrecognized region %+q; try specifying endpoint instead", v.Region)
+               }
+       } else if ok {
+               return fmt.Errorf("refusing to use AWS region name %+q with endpoint %+q; "+
+                       "specify empty endpoint or use a different region name", v.Region, v.Endpoint)
+       } else {
+               v.region = aws.Region{
+                       Name:                 v.Region,
+                       S3Endpoint:           v.Endpoint,
+                       S3LocationConstraint: v.LocationConstraint,
+               }
+       }
+
+       // Zero timeouts mean "wait forever", which is a bad
+       // default. Default to long timeouts instead.
+       if v.ConnectTimeout == 0 {
+               v.ConnectTimeout = s3DefaultConnectTimeout
+       }
+       if v.ReadTimeout == 0 {
+               v.ReadTimeout = s3DefaultReadTimeout
+       }
+
+       v.bucket = &s3bucket{
+               bucket: &s3.Bucket{
+                       S3:   v.newS3Client(),
+                       Name: v.Bucket,
+               },
+       }
+       // Set up prometheus metrics
+       lbls := prometheus.Labels{"device_id": v.GetDeviceID()}
+       v.bucket.stats.opsCounters, v.bucket.stats.errCounters, v.bucket.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
+
+       err := v.bootstrapIAMCredentials()
+       if err != nil {
+               return fmt.Errorf("error getting IAM credentials: %s", err)
+       }
+
+       return nil
+}
+
 const (
        s3DefaultReadTimeout    = arvados.Duration(10 * time.Minute)
        s3DefaultConnectTimeout = arvados.Duration(time.Minute)
@@ -36,15 +108,7 @@ const (
 var (
        // ErrS3TrashDisabled is returned by Trash if that operation
        // is impossible with the current config.
-       ErrS3TrashDisabled = fmt.Errorf("trash function is disabled because -trash-lifetime=0 and -s3-unsafe-delete=false")
-
-       s3AccessKeyFile string
-       s3SecretKeyFile string
-       s3RegionName    string
-       s3Endpoint      string
-       s3Replication   int
-       s3UnsafeDelete  bool
-       s3RaceWindow    time.Duration
+       ErrS3TrashDisabled = fmt.Errorf("trash function is disabled because Collections.BlobTrashLifetime=0 and DriverParameters.UnsafeDelete=false")
 
        s3ACL = s3.Private
 
@@ -56,40 +120,6 @@ const (
        nearlyRFC1123 = "Mon, 2 Jan 2006 15:04:05 GMT"
 )
 
-type s3VolumeAdder struct {
-       *Config
-}
-
-// String implements flag.Value
-func (s *s3VolumeAdder) String() string {
-       return "-"
-}
-
-func (s *s3VolumeAdder) Set(bucketName string) error {
-       if bucketName == "" {
-               return fmt.Errorf("no container name given")
-       }
-       if s3AccessKeyFile == "" || s3SecretKeyFile == "" {
-               return fmt.Errorf("-s3-access-key-file and -s3-secret-key-file arguments must given before -s3-bucket-volume")
-       }
-       if deprecated.flagSerializeIO {
-               log.Print("Notice: -serialize is not supported by s3-bucket volumes.")
-       }
-       s.Config.Volumes = append(s.Config.Volumes, &S3Volume{
-               Bucket:        bucketName,
-               AccessKeyFile: s3AccessKeyFile,
-               SecretKeyFile: s3SecretKeyFile,
-               Endpoint:      s3Endpoint,
-               Region:        s3RegionName,
-               RaceWindow:    arvados.Duration(s3RaceWindow),
-               S3Replication: s3Replication,
-               UnsafeDelete:  s3UnsafeDelete,
-               ReadOnly:      deprecated.flagReadonly,
-               IndexPageSize: 1000,
-       })
-       return nil
-}
-
 func s3regions() (okList []string) {
        for r := range aws.Regions {
                okList = append(okList, r)
@@ -97,168 +127,170 @@ func s3regions() (okList []string) {
        return
 }
 
-func init() {
-       VolumeTypes = append(VolumeTypes, func() VolumeWithExamples { return &S3Volume{} })
-
-       flag.Var(&s3VolumeAdder{theConfig},
-               "s3-bucket-volume",
-               "Use the given bucket as a storage volume. Can be given multiple times.")
-       flag.StringVar(
-               &s3RegionName,
-               "s3-region",
-               "",
-               fmt.Sprintf("AWS region used for subsequent -s3-bucket-volume arguments. Allowed values are %+q.", s3regions()))
-       flag.StringVar(
-               &s3Endpoint,
-               "s3-endpoint",
-               "",
-               "Endpoint URL used for subsequent -s3-bucket-volume arguments. If blank, use the AWS endpoint corresponding to the -s3-region argument. For Google Storage, use \"https://storage.googleapis.com\".")
-       flag.StringVar(
-               &s3AccessKeyFile,
-               "s3-access-key-file",
-               "",
-               "`File` containing the access key used for subsequent -s3-bucket-volume arguments.")
-       flag.StringVar(
-               &s3SecretKeyFile,
-               "s3-secret-key-file",
-               "",
-               "`File` containing the secret key used for subsequent -s3-bucket-volume arguments.")
-       flag.DurationVar(
-               &s3RaceWindow,
-               "s3-race-window",
-               24*time.Hour,
-               "Maximum eventual consistency latency for subsequent -s3-bucket-volume arguments.")
-       flag.IntVar(
-               &s3Replication,
-               "s3-replication",
-               2,
-               "Replication level reported to clients for subsequent -s3-bucket-volume arguments.")
-       flag.BoolVar(
-               &s3UnsafeDelete,
-               "s3-unsafe-delete",
-               false,
-               "EXPERIMENTAL. Enable deletion (garbage collection) even when trash lifetime is zero, even though there are known race conditions that can cause data loss.")
-}
-
 // S3Volume implements Volume using an S3 bucket.
 type S3Volume struct {
-       AccessKeyFile      string
-       SecretKeyFile      string
+       AccessKey          string
+       SecretKey          string
+       AuthToken          string    // populated automatically when IAMRole is used
+       AuthExpiration     time.Time // populated automatically when IAMRole is used
+       IAMRole            string
        Endpoint           string
        Region             string
        Bucket             string
        LocationConstraint bool
        IndexPageSize      int
-       S3Replication      int
        ConnectTimeout     arvados.Duration
        ReadTimeout        arvados.Duration
        RaceWindow         arvados.Duration
-       ReadOnly           bool
        UnsafeDelete       bool
-       StorageClasses     []string
-
-       bucket *s3bucket
 
+       cluster   *arvados.Cluster
+       volume    arvados.Volume
+       logger    logrus.FieldLogger
+       metrics   *volumeMetricsVecs
+       bucket    *s3bucket
+       region    aws.Region
        startOnce sync.Once
 }
 
-// Examples implements VolumeWithExamples.
-func (*S3Volume) Examples() []Volume {
-       return []Volume{
-               &S3Volume{
-                       AccessKeyFile:  "/etc/aws_s3_access_key.txt",
-                       SecretKeyFile:  "/etc/aws_s3_secret_key.txt",
-                       Endpoint:       "",
-                       Region:         "us-east-1",
-                       Bucket:         "example-bucket-name",
-                       IndexPageSize:  1000,
-                       S3Replication:  2,
-                       RaceWindow:     arvados.Duration(24 * time.Hour),
-                       ConnectTimeout: arvados.Duration(time.Minute),
-                       ReadTimeout:    arvados.Duration(5 * time.Minute),
-               },
-               &S3Volume{
-                       AccessKeyFile:  "/etc/gce_s3_access_key.txt",
-                       SecretKeyFile:  "/etc/gce_s3_secret_key.txt",
-                       Endpoint:       "https://storage.googleapis.com",
-                       Region:         "",
-                       Bucket:         "example-bucket-name",
-                       IndexPageSize:  1000,
-                       S3Replication:  2,
-                       RaceWindow:     arvados.Duration(24 * time.Hour),
-                       ConnectTimeout: arvados.Duration(time.Minute),
-                       ReadTimeout:    arvados.Duration(5 * time.Minute),
-               },
-       }
-}
-
-// Type implements Volume.
-func (*S3Volume) Type() string {
-       return "S3"
+// GetDeviceID returns a globally unique ID for the storage bucket.
+func (v *S3Volume) GetDeviceID() string {
+       return "s3://" + v.Endpoint + "/" + v.Bucket
 }
 
-// Start populates private fields and verifies the configuration is
-// valid.
-func (v *S3Volume) Start(vm *volumeMetricsVecs) error {
-       region, ok := aws.Regions[v.Region]
-       if v.Endpoint == "" {
-               if !ok {
-                       return fmt.Errorf("unrecognized region %+q; try specifying -s3-endpoint instead", v.Region)
-               }
-       } else if ok {
-               return fmt.Errorf("refusing to use AWS region name %+q with endpoint %+q; "+
-                       "specify empty endpoint (\"-s3-endpoint=\") or use a different region name", v.Region, v.Endpoint)
-       } else {
-               region = aws.Region{
-                       Name:                 v.Region,
-                       S3Endpoint:           v.Endpoint,
-                       S3LocationConstraint: v.LocationConstraint,
+func (v *S3Volume) bootstrapIAMCredentials() error {
+       if v.AccessKey != "" || v.SecretKey != "" {
+               if v.IAMRole != "" {
+                       return errors.New("invalid DriverParameters: AccessKey and SecretKey must be blank if IAMRole is specified")
                }
+               return nil
        }
-
-       var err error
-       var auth aws.Auth
-       auth.AccessKey, err = readKeyFromFile(v.AccessKeyFile)
-       if err != nil {
-               return err
-       }
-       auth.SecretKey, err = readKeyFromFile(v.SecretKeyFile)
+       ttl, err := v.updateIAMCredentials()
        if err != nil {
                return err
        }
+       go func() {
+               for {
+                       time.Sleep(ttl)
+                       ttl, err = v.updateIAMCredentials()
+                       if err != nil {
+                               v.logger.WithError(err).Warnf("failed to update credentials for IAM role %q", v.IAMRole)
+                               ttl = time.Second
+                       } else if ttl < time.Second {
+                               v.logger.WithField("TTL", ttl).Warnf("received stale credentials for IAM role %q", v.IAMRole)
+                               ttl = time.Second
+                       }
+               }
+       }()
+       return nil
+}
 
-       // Zero timeouts mean "wait forever", which is a bad
-       // default. Default to long timeouts instead.
-       if v.ConnectTimeout == 0 {
-               v.ConnectTimeout = s3DefaultConnectTimeout
-       }
-       if v.ReadTimeout == 0 {
-               v.ReadTimeout = s3DefaultReadTimeout
-       }
-
-       client := s3.New(auth, region)
-       if region.EC2Endpoint.Signer == aws.V4Signature {
+func (v *S3Volume) newS3Client() *s3.S3 {
+       auth := aws.NewAuth(v.AccessKey, v.SecretKey, v.AuthToken, v.AuthExpiration)
+       client := s3.New(*auth, v.region)
+       if v.region.EC2Endpoint.Signer == aws.V4Signature {
                // Currently affects only eu-central-1
                client.Signature = aws.V4Signature
        }
        client.ConnectTimeout = time.Duration(v.ConnectTimeout)
        client.ReadTimeout = time.Duration(v.ReadTimeout)
-       v.bucket = &s3bucket{
-               Bucket: &s3.Bucket{
-                       S3:   client,
-                       Name: v.Bucket,
-               },
-       }
-       // Set up prometheus metrics
-       lbls := prometheus.Labels{"device_id": v.DeviceID()}
-       v.bucket.stats.opsCounters, v.bucket.stats.errCounters, v.bucket.stats.ioBytes = vm.getCounterVecsFor(lbls)
+       return client
+}
 
-       return nil
+// returned by AWS metadata endpoint .../security-credentials/${rolename}
+type iamCredentials struct {
+       Code            string
+       LastUpdated     time.Time
+       Type            string
+       AccessKeyID     string
+       SecretAccessKey string
+       Token           string
+       Expiration      time.Time
 }
 
-// DeviceID returns a globally unique ID for the storage bucket.
-func (v *S3Volume) DeviceID() string {
-       return "s3://" + v.Endpoint + "/" + v.Bucket
+// Returns TTL of updated credentials, i.e., time to sleep until next
+// update.
+func (v *S3Volume) updateIAMCredentials() (time.Duration, error) {
+       ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Minute))
+       defer cancel()
+
+       metadataBaseURL := "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
+
+       var url string
+       if strings.Contains(v.IAMRole, "://") {
+               // Configuration provides complete URL (used by tests)
+               url = v.IAMRole
+       } else if v.IAMRole != "" {
+               // Configuration provides IAM role name and we use the
+               // AWS metadata endpoint
+               url = metadataBaseURL + v.IAMRole
+       } else {
+               url = metadataBaseURL
+               v.logger.WithField("URL", url).Debug("looking up IAM role name")
+               req, err := http.NewRequest("GET", url, nil)
+               if err != nil {
+                       return 0, fmt.Errorf("error setting up request %s: %s", url, err)
+               }
+               resp, err := http.DefaultClient.Do(req.WithContext(ctx))
+               if err != nil {
+                       return 0, fmt.Errorf("error getting %s: %s", url, err)
+               }
+               defer resp.Body.Close()
+               if resp.StatusCode == http.StatusNotFound {
+                       return 0, fmt.Errorf("this instance does not have an IAM role assigned -- either assign a role, or configure AccessKey and SecretKey explicitly in DriverParameters (error getting %s: HTTP status %s)", url, resp.Status)
+               } else if resp.StatusCode != http.StatusOK {
+                       return 0, fmt.Errorf("error getting %s: HTTP status %s", url, resp.Status)
+               }
+               body := bufio.NewReader(resp.Body)
+               var role string
+               _, err = fmt.Fscanf(body, "%s\n", &role)
+               if err != nil {
+                       return 0, fmt.Errorf("error reading response from %s: %s", url, err)
+               }
+               if n, _ := body.Read(make([]byte, 64)); n > 0 {
+                       v.logger.Warnf("ignoring additional data returned by metadata endpoint %s after the single role name that we expected", url)
+               }
+               v.logger.WithField("Role", role).Debug("looked up IAM role name")
+               url = url + role
+       }
+
+       v.logger.WithField("URL", url).Debug("getting credentials")
+       req, err := http.NewRequest("GET", url, nil)
+       if err != nil {
+               return 0, fmt.Errorf("error setting up request %s: %s", url, err)
+       }
+       resp, err := http.DefaultClient.Do(req.WithContext(ctx))
+       if err != nil {
+               return 0, fmt.Errorf("error getting %s: %s", url, err)
+       }
+       defer resp.Body.Close()
+       if resp.StatusCode != http.StatusOK {
+               return 0, fmt.Errorf("error getting %s: HTTP status %s", url, resp.Status)
+       }
+       var cred iamCredentials
+       err = json.NewDecoder(resp.Body).Decode(&cred)
+       if err != nil {
+               return 0, fmt.Errorf("error decoding credentials from %s: %s", url, err)
+       }
+       v.AccessKey, v.SecretKey, v.AuthToken, v.AuthExpiration = cred.AccessKeyID, cred.SecretAccessKey, cred.Token, cred.Expiration
+       v.bucket.SetBucket(&s3.Bucket{
+               S3:   v.newS3Client(),
+               Name: v.Bucket,
+       })
+       // TTL is time from now to expiration, minus 5m.  "We make new
+       // credentials available at least five minutes before the
+       // expiration of the old credentials."  --
+       // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials
+       // (If that's not true, the returned ttl might be zero or
+       // negative, which the caller can handle.)
+       ttl := cred.Expiration.Sub(time.Now()) - 5*time.Minute
+       v.logger.WithFields(logrus.Fields{
+               "AccessKeyID": cred.AccessKeyID,
+               "LastUpdated": cred.LastUpdated,
+               "Expiration":  cred.Expiration,
+               "TTL":         arvados.Duration(ttl),
+       }).Debug("updated credentials")
+       return ttl, nil
 }
 
 func (v *S3Volume) getReaderWithContext(ctx context.Context, loc string) (rdr io.ReadCloser, err error) {
@@ -271,7 +303,7 @@ func (v *S3Volume) getReaderWithContext(ctx context.Context, loc string) (rdr io
        case <-ready:
                return
        case <-ctx.Done():
-               theConfig.debugLogf("s3: abandoning getReader(): %s", ctx.Err())
+               v.logger.Debugf("s3: abandoning getReader(): %s", ctx.Err())
                go func() {
                        <-ready
                        if err == nil {
@@ -339,11 +371,11 @@ func (v *S3Volume) Get(ctx context.Context, loc string, buf []byte) (int, error)
        }()
        select {
        case <-ctx.Done():
-               theConfig.debugLogf("s3: interrupting ReadFull() with Close() because %s", ctx.Err())
+               v.logger.Debugf("s3: interrupting ReadFull() with Close() because %s", ctx.Err())
                rdr.Close()
                // Must wait for ReadFull to return, to ensure it
                // doesn't write to buf after we return.
-               theConfig.debugLogf("s3: waiting for ReadFull() to fail")
+               v.logger.Debug("s3: waiting for ReadFull() to fail")
                <-ready
                return 0, ctx.Err()
        case <-ready:
@@ -397,7 +429,7 @@ func (v *S3Volume) Compare(ctx context.Context, loc string, expect []byte) error
 
 // Put writes a block.
 func (v *S3Volume) Put(ctx context.Context, loc string, block []byte) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        var opts s3.Options
@@ -433,7 +465,7 @@ func (v *S3Volume) Put(ctx context.Context, loc string, block []byte) error {
        go func() {
                defer func() {
                        if ctx.Err() != nil {
-                               theConfig.debugLogf("%s: abandoned PutReader goroutine finished with err: %s", v, err)
+                               v.logger.Debugf("%s: abandoned PutReader goroutine finished with err: %s", v, err)
                        }
                }()
                defer close(ready)
@@ -445,7 +477,7 @@ func (v *S3Volume) Put(ctx context.Context, loc string, block []byte) error {
        }()
        select {
        case <-ctx.Done():
-               theConfig.debugLogf("%s: taking PutReader's input away: %s", v, ctx.Err())
+               v.logger.Debugf("%s: taking PutReader's input away: %s", v, ctx.Err())
                // Our pipe might be stuck in Write(), waiting for
                // PutReader() to read. If so, un-stick it. This means
                // PutReader will get corrupt data, but that's OK: the
@@ -453,7 +485,7 @@ func (v *S3Volume) Put(ctx context.Context, loc string, block []byte) error {
                go io.Copy(ioutil.Discard, bufr)
                // CloseWithError() will return once pending I/O is done.
                bufw.CloseWithError(ctx.Err())
-               theConfig.debugLogf("%s: abandoning PutReader goroutine", v)
+               v.logger.Debugf("%s: abandoning PutReader goroutine", v)
                return ctx.Err()
        case <-ready:
                // Unblock pipe in case PutReader did not consume it.
@@ -464,7 +496,7 @@ func (v *S3Volume) Put(ctx context.Context, loc string, block []byte) error {
 
 // Touch sets the timestamp for the given locator to the current time.
 func (v *S3Volume) Touch(loc string) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        _, err := v.bucket.Head(loc, nil)
@@ -512,13 +544,13 @@ func (v *S3Volume) Mtime(loc string) (time.Time, error) {
 func (v *S3Volume) IndexTo(prefix string, writer io.Writer) error {
        // Use a merge sort to find matching sets of X and recent/X.
        dataL := s3Lister{
-               Bucket:   v.bucket.Bucket,
+               Bucket:   v.bucket.Bucket(),
                Prefix:   prefix,
                PageSize: v.IndexPageSize,
                Stats:    &v.bucket.stats,
        }
        recentL := s3Lister{
-               Bucket:   v.bucket.Bucket,
+               Bucket:   v.bucket.Bucket(),
                Prefix:   "recent/" + prefix,
                PageSize: v.IndexPageSize,
                Stats:    &v.bucket.stats,
@@ -571,16 +603,16 @@ func (v *S3Volume) IndexTo(prefix string, writer io.Writer) error {
 
 // Trash a Keep block.
 func (v *S3Volume) Trash(loc string) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        if t, err := v.Mtime(loc); err != nil {
                return err
-       } else if time.Since(t) < theConfig.BlobSignatureTTL.Duration() {
+       } else if time.Since(t) < v.cluster.Collections.BlobSigningTTL.Duration() {
                return nil
        }
-       if theConfig.TrashLifetime == 0 {
-               if !s3UnsafeDelete {
+       if v.cluster.Collections.BlobTrashLifetime == 0 {
+               if !v.UnsafeDelete {
                        return ErrS3TrashDisabled
                }
                return v.translateError(v.bucket.Del(loc))
@@ -615,7 +647,7 @@ func (v *S3Volume) checkRaceWindow(loc string) error {
                // Can't parse timestamp
                return err
        }
-       safeWindow := t.Add(theConfig.TrashLifetime.Duration()).Sub(time.Now().Add(time.Duration(v.RaceWindow)))
+       safeWindow := t.Add(v.cluster.Collections.BlobTrashLifetime.Duration()).Sub(time.Now().Add(time.Duration(v.RaceWindow)))
        if safeWindow <= 0 {
                // We can't count on "touch trash/X" to prolong
                // trash/X's lifetime. The new timestamp might not
@@ -633,15 +665,15 @@ func (v *S3Volume) checkRaceWindow(loc string) error {
 // (PutCopy returns 200 OK if the request was received, even if the
 // copy failed).
 func (v *S3Volume) safeCopy(dst, src string) error {
-       resp, err := v.bucket.PutCopy(dst, s3ACL, s3.CopyOptions{
+       resp, err := v.bucket.Bucket().PutCopy(dst, s3ACL, s3.CopyOptions{
                ContentType:       "application/octet-stream",
                MetadataDirective: "REPLACE",
-       }, v.bucket.Name+"/"+src)
+       }, v.bucket.Bucket().Name+"/"+src)
        err = v.translateError(err)
        if os.IsNotExist(err) {
                return err
        } else if err != nil {
-               return fmt.Errorf("PutCopy(%q ← %q): %s", dst, v.bucket.Name+"/"+src, err)
+               return fmt.Errorf("PutCopy(%q ← %q): %s", dst, v.bucket.Bucket().Name+"/"+src, err)
        }
        if t, err := time.Parse(time.RFC3339Nano, resp.LastModified); err != nil {
                return fmt.Errorf("PutCopy succeeded but did not return a timestamp: %q: %s", resp.LastModified, err)
@@ -698,23 +730,6 @@ func (v *S3Volume) String() string {
        return fmt.Sprintf("s3-bucket:%+q", v.Bucket)
 }
 
-// Writable returns false if all future Put, Mtime, and Delete calls
-// are expected to fail.
-func (v *S3Volume) Writable() bool {
-       return !v.ReadOnly
-}
-
-// Replication returns the storage redundancy of the underlying
-// device. Configured via command line flag.
-func (v *S3Volume) Replication() int {
-       return v.S3Replication
-}
-
-// GetStorageClasses implements Volume
-func (v *S3Volume) GetStorageClasses() []string {
-       return v.StorageClasses
-}
-
 var s3KeepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
 
 func (v *S3Volume) isKeepBlock(s string) bool {
@@ -751,13 +766,13 @@ func (v *S3Volume) fixRace(loc string) bool {
        }
 
        ageWhenTrashed := trashTime.Sub(recentTime)
-       if ageWhenTrashed >= theConfig.BlobSignatureTTL.Duration() {
+       if ageWhenTrashed >= v.cluster.Collections.BlobSigningTTL.Duration() {
                // No evidence of a race: block hasn't been written
                // since it became eligible for Trash. No fix needed.
                return false
        }
 
-       log.Printf("notice: fixRace: %q: trashed at %s but touched at %s (age when trashed = %s < %s)", loc, trashTime, recentTime, ageWhenTrashed, theConfig.BlobSignatureTTL)
+       log.Printf("notice: fixRace: %q: trashed at %s but touched at %s (age when trashed = %s < %s)", loc, trashTime, recentTime, ageWhenTrashed, v.cluster.Collections.BlobSigningTTL)
        log.Printf("notice: fixRace: copying %q to %q to recover from race between Put/Touch and Trash", "recent/"+loc, loc)
        err = v.safeCopy(loc, "trash/"+loc)
        if err != nil {
@@ -782,9 +797,13 @@ func (v *S3Volume) translateError(err error) error {
        return err
 }
 
-// EmptyTrash looks for trashed blocks that exceeded TrashLifetime
+// EmptyTrash looks for trashed blocks that exceeded BlobTrashLifetime
 // and deletes them from the volume.
 func (v *S3Volume) EmptyTrash() {
+       if v.cluster.Collections.BlobDeleteConcurrency < 1 {
+               return
+       }
+
        var bytesInTrash, blocksInTrash, bytesDeleted, blocksDeleted int64
 
        // Define "ready to delete" as "...when EmptyTrash started".
@@ -820,15 +839,15 @@ func (v *S3Volume) EmptyTrash() {
                        log.Printf("warning: %s: EmptyTrash: %q: parse %q: %s", v, "recent/"+loc, recent.Header.Get("Last-Modified"), err)
                        return
                }
-               if trashT.Sub(recentT) < theConfig.BlobSignatureTTL.Duration() {
-                       if age := startT.Sub(recentT); age >= theConfig.BlobSignatureTTL.Duration()-time.Duration(v.RaceWindow) {
+               if trashT.Sub(recentT) < v.cluster.Collections.BlobSigningTTL.Duration() {
+                       if age := startT.Sub(recentT); age >= v.cluster.Collections.BlobSigningTTL.Duration()-time.Duration(v.RaceWindow) {
                                // recent/loc is too old to protect
                                // loc from being Trashed again during
                                // the raceWindow that starts if we
                                // delete trash/X now.
                                //
-                               // Note this means (TrashCheckInterval
-                               // < BlobSignatureTTL - raceWindow) is
+                               // Note this means (TrashSweepInterval
+                               // < BlobSigningTTL - raceWindow) is
                                // necessary to avoid starvation.
                                log.Printf("notice: %s: EmptyTrash: detected old race for %q, calling fixRace + Touch", v, loc)
                                v.fixRace(loc)
@@ -845,7 +864,7 @@ func (v *S3Volume) EmptyTrash() {
                                return
                        }
                }
-               if startT.Sub(trashT) < theConfig.TrashLifetime.Duration() {
+               if startT.Sub(trashT) < v.cluster.Collections.BlobTrashLifetime.Duration() {
                        return
                }
                err = v.bucket.Del(trash.Key)
@@ -872,8 +891,8 @@ func (v *S3Volume) EmptyTrash() {
        }
 
        var wg sync.WaitGroup
-       todo := make(chan *s3.Key, theConfig.EmptyTrashWorkers)
-       for i := 0; i < 1 || i < theConfig.EmptyTrashWorkers; i++ {
+       todo := make(chan *s3.Key, v.cluster.Collections.BlobDeleteConcurrency)
+       for i := 0; i < v.cluster.Collections.BlobDeleteConcurrency; i++ {
                wg.Add(1)
                go func() {
                        defer wg.Done()
@@ -884,7 +903,7 @@ func (v *S3Volume) EmptyTrash() {
        }
 
        trashL := s3Lister{
-               Bucket:   v.bucket.Bucket,
+               Bucket:   v.bucket.Bucket(),
                Prefix:   "trash/",
                PageSize: v.IndexPageSize,
                Stats:    &v.bucket.stats,
@@ -963,14 +982,29 @@ func (lister *s3Lister) pop() (k *s3.Key) {
        return
 }
 
-// s3bucket wraps s3.bucket and counts I/O and API usage stats.
+// s3bucket wraps s3.bucket and counts I/O and API usage stats. The
+// wrapped bucket can be replaced atomically with SetBucket in order
+// to update credentials.
 type s3bucket struct {
-       *s3.Bucket
-       stats s3bucketStats
+       bucket *s3.Bucket
+       stats  s3bucketStats
+       mu     sync.Mutex
+}
+
+func (b *s3bucket) Bucket() *s3.Bucket {
+       b.mu.Lock()
+       defer b.mu.Unlock()
+       return b.bucket
+}
+
+func (b *s3bucket) SetBucket(bucket *s3.Bucket) {
+       b.mu.Lock()
+       defer b.mu.Unlock()
+       b.bucket = bucket
 }
 
 func (b *s3bucket) GetReader(path string) (io.ReadCloser, error) {
-       rdr, err := b.Bucket.GetReader(path)
+       rdr, err := b.Bucket().GetReader(path)
        b.stats.TickOps("get")
        b.stats.Tick(&b.stats.Ops, &b.stats.GetOps)
        b.stats.TickErr(err)
@@ -978,7 +1012,7 @@ func (b *s3bucket) GetReader(path string) (io.ReadCloser, error) {
 }
 
 func (b *s3bucket) Head(path string, headers map[string][]string) (*http.Response, error) {
-       resp, err := b.Bucket.Head(path, headers)
+       resp, err := b.Bucket().Head(path, headers)
        b.stats.TickOps("head")
        b.stats.Tick(&b.stats.Ops, &b.stats.HeadOps)
        b.stats.TickErr(err)
@@ -997,7 +1031,7 @@ func (b *s3bucket) PutReader(path string, r io.Reader, length int64, contType st
        } else {
                r = NewCountingReader(r, b.stats.TickOutBytes)
        }
-       err := b.Bucket.PutReader(path, r, length, contType, perm, options)
+       err := b.Bucket().PutReader(path, r, length, contType, perm, options)
        b.stats.TickOps("put")
        b.stats.Tick(&b.stats.Ops, &b.stats.PutOps)
        b.stats.TickErr(err)
@@ -1005,7 +1039,7 @@ func (b *s3bucket) PutReader(path string, r io.Reader, length int64, contType st
 }
 
 func (b *s3bucket) Del(path string) error {
-       err := b.Bucket.Del(path)
+       err := b.Bucket().Del(path)
        b.stats.TickOps("delete")
        b.stats.Tick(&b.stats.Ops, &b.stats.DelOps)
        b.stats.TickErr(err)
index 6377420ff4b381cba49b07d2813fb4803f03aa62..49ea24aa03b8cee1903a2de01010b061085b4528 100644 (file)
@@ -10,17 +10,20 @@ import (
        "crypto/md5"
        "encoding/json"
        "fmt"
-       "io/ioutil"
+       "io"
+       "log"
        "net/http"
        "net/http/httptest"
        "os"
+       "strings"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/AdRoll/goamz/s3"
        "github.com/AdRoll/goamz/s3/s3test"
-       "github.com/ghodss/yaml"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
@@ -39,34 +42,43 @@ func (c *fakeClock) Now() time.Time {
        return *c.now
 }
 
-func init() {
-       // Deleting isn't safe from races, but if it's turned on
-       // anyway we do expect it to pass the generic volume tests.
-       s3UnsafeDelete = true
-}
-
 var _ = check.Suite(&StubbedS3Suite{})
 
 type StubbedS3Suite struct {
-       volumes []*TestableS3Volume
+       s3server *httptest.Server
+       metadata *httptest.Server
+       cluster  *arvados.Cluster
+       handler  *handler
+       volumes  []*TestableS3Volume
+}
+
+func (s *StubbedS3Suite) SetUpTest(c *check.C) {
+       s.s3server = nil
+       s.metadata = nil
+       s.cluster = testCluster(c)
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-000000000000000": {Driver: "S3"},
+               "zzzzz-nyw5e-111111111111111": {Driver: "S3"},
+       }
+       s.handler = &handler{}
 }
 
 func (s *StubbedS3Suite) TestGeneric(c *check.C) {
-       DoGenericVolumeTests(c, func(t TB) TestableVolume {
+       DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
                // Use a negative raceWindow so s3test's 1-second
                // timestamp precision doesn't confuse fixRace.
-               return s.newTestableVolume(c, -2*time.Second, false, 2)
+               return s.newTestableVolume(c, cluster, volume, metrics, -2*time.Second)
        })
 }
 
 func (s *StubbedS3Suite) TestGenericReadOnly(c *check.C) {
-       DoGenericVolumeTests(c, func(t TB) TestableVolume {
-               return s.newTestableVolume(c, -2*time.Second, true, 2)
+       DoGenericVolumeTests(c, true, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableVolume(c, cluster, volume, metrics, -2*time.Second)
        })
 }
 
 func (s *StubbedS3Suite) TestIndex(c *check.C) {
-       v := s.newTestableVolume(c, 0, false, 2)
+       v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 0)
        v.IndexPageSize = 3
        for i := 0; i < 256; i++ {
                v.PutRaw(fmt.Sprintf("%02x%030x", i, i), []byte{102, 111, 111})
@@ -90,8 +102,42 @@ func (s *StubbedS3Suite) TestIndex(c *check.C) {
        }
 }
 
+func (s *StubbedS3Suite) TestIAMRoleCredentials(c *check.C) {
+       s.metadata = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+               upd := time.Now().UTC().Add(-time.Hour).Format(time.RFC3339)
+               exp := time.Now().UTC().Add(time.Hour).Format(time.RFC3339)
+               // Literal example from
+               // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#instance-metadata-security-credentials
+               // but with updated timestamps
+               io.WriteString(w, `{"Code":"Success","LastUpdated":"`+upd+`","Type":"AWS-HMAC","AccessKeyId":"ASIAIOSFODNN7EXAMPLE","SecretAccessKey":"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY","Token":"token","Expiration":"`+exp+`"}`)
+       }))
+       defer s.metadata.Close()
+
+       v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
+       c.Check(v.AccessKey, check.Equals, "ASIAIOSFODNN7EXAMPLE")
+       c.Check(v.SecretKey, check.Equals, "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+       c.Check(v.bucket.bucket.S3.Auth.AccessKey, check.Equals, "ASIAIOSFODNN7EXAMPLE")
+       c.Check(v.bucket.bucket.S3.Auth.SecretKey, check.Equals, "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+       s.metadata = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+               w.WriteHeader(http.StatusNotFound)
+       }))
+       deadv := &S3Volume{
+               IAMRole:  s.metadata.URL + "/fake-metadata/test-role",
+               Endpoint: "http://localhost:12345",
+               Region:   "test-region-1",
+               Bucket:   "test-bucket-name",
+               cluster:  s.cluster,
+               logger:   ctxlog.TestLogger(c),
+               metrics:  newVolumeMetricsVecs(prometheus.NewRegistry()),
+       }
+       err := deadv.check()
+       c.Check(err, check.ErrorMatches, `.*/fake-metadata/test-role.*`)
+       c.Check(err, check.ErrorMatches, `.*404.*`)
+}
+
 func (s *StubbedS3Suite) TestStats(c *check.C) {
-       v := s.newTestableVolume(c, 5*time.Minute, false, 2)
+       v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
        stats := func() string {
                buf, err := json.Marshal(v.InternalStats())
                c.Check(err, check.IsNil)
@@ -125,6 +171,11 @@ type blockingHandler struct {
 }
 
 func (h *blockingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+       if r.Method == "PUT" && !strings.Contains(strings.Trim(r.URL.Path, "/"), "/") {
+               // Accept PutBucket ("PUT /bucketname/"), called by
+               // newTestableVolume
+               return
+       }
        if h.requested != nil {
                h.requested <- r
        }
@@ -164,15 +215,10 @@ func (s *StubbedS3Suite) TestPutContextCancel(c *check.C) {
 
 func (s *StubbedS3Suite) testContextCancel(c *check.C, testFunc func(context.Context, *TestableS3Volume) error) {
        handler := &blockingHandler{}
-       srv := httptest.NewServer(handler)
-       defer srv.Close()
+       s.s3server = httptest.NewServer(handler)
+       defer s.s3server.Close()
 
-       v := s.newTestableVolume(c, 5*time.Minute, false, 2)
-       vol := *v.S3Volume
-       vol.Endpoint = srv.URL
-       v = &TestableS3Volume{S3Volume: &vol}
-       metrics := newVolumeMetricsVecs(prometheus.NewRegistry())
-       v.Start(metrics)
+       v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
 
        ctx, cancel := context.WithCancel(context.Background())
 
@@ -209,14 +255,10 @@ func (s *StubbedS3Suite) testContextCancel(c *check.C, testFunc func(context.Con
 }
 
 func (s *StubbedS3Suite) TestBackendStates(c *check.C) {
-       defer func(tl, bs arvados.Duration) {
-               theConfig.TrashLifetime = tl
-               theConfig.BlobSignatureTTL = bs
-       }(theConfig.TrashLifetime, theConfig.BlobSignatureTTL)
-       theConfig.TrashLifetime.Set("1h")
-       theConfig.BlobSignatureTTL.Set("1h")
-
-       v := s.newTestableVolume(c, 5*time.Minute, false, 2)
+       s.cluster.Collections.BlobTrashLifetime.Set("1h")
+       s.cluster.Collections.BlobSigningTTL.Set("1h")
+
+       v := s.newTestableVolume(c, s.cluster, arvados.Volume{Replication: 2}, newVolumeMetricsVecs(prometheus.NewRegistry()), 5*time.Minute)
        var none time.Time
 
        putS3Obj := func(t time.Time, key string, data []byte) {
@@ -224,7 +266,7 @@ func (s *StubbedS3Suite) TestBackendStates(c *check.C) {
                        return
                }
                v.serverClock.now = &t
-               v.bucket.Put(key, data, "application/octet-stream", s3ACL, s3.Options{})
+               v.bucket.Bucket().Put(key, data, "application/octet-stream", s3ACL, s3.Options{})
        }
 
        t0 := time.Now()
@@ -311,12 +353,12 @@ func (s *StubbedS3Suite) TestBackendStates(c *check.C) {
                        false, false, false, true, false, false,
                },
                {
-                       "Erroneously trashed during a race, detected before TrashLifetime",
+                       "Erroneously trashed during a race, detected before BlobTrashLifetime",
                        none, t0.Add(-30 * time.Minute), t0.Add(-29 * time.Minute),
                        true, false, true, true, true, false,
                },
                {
-                       "Erroneously trashed during a race, rescue during EmptyTrash despite reaching TrashLifetime",
+                       "Erroneously trashed during a race, rescue during EmptyTrash despite reaching BlobTrashLifetime",
                        none, t0.Add(-90 * time.Minute), t0.Add(-89 * time.Minute),
                        true, false, true, true, true, false,
                },
@@ -411,68 +453,55 @@ type TestableS3Volume struct {
        serverClock *fakeClock
 }
 
-func (s *StubbedS3Suite) newTestableVolume(c *check.C, raceWindow time.Duration, readonly bool, replication int) *TestableS3Volume {
+func (s *StubbedS3Suite) newTestableVolume(c *check.C, cluster *arvados.Cluster, volume arvados.Volume, metrics *volumeMetricsVecs, raceWindow time.Duration) *TestableS3Volume {
        clock := &fakeClock{}
        srv, err := s3test.NewServer(&s3test.Config{Clock: clock})
        c.Assert(err, check.IsNil)
+       endpoint := srv.URL()
+       if s.s3server != nil {
+               endpoint = s.s3server.URL
+       }
+
+       iamRole, accessKey, secretKey := "", "xxx", "xxx"
+       if s.metadata != nil {
+               iamRole, accessKey, secretKey = s.metadata.URL+"/fake-metadata/test-role", "", ""
+       }
 
        v := &TestableS3Volume{
                S3Volume: &S3Volume{
+                       AccessKey:          accessKey,
+                       SecretKey:          secretKey,
+                       IAMRole:            iamRole,
                        Bucket:             TestBucketName,
-                       Endpoint:           srv.URL(),
+                       Endpoint:           endpoint,
                        Region:             "test-region-1",
                        LocationConstraint: true,
-                       RaceWindow:         arvados.Duration(raceWindow),
-                       S3Replication:      replication,
-                       UnsafeDelete:       s3UnsafeDelete,
-                       ReadOnly:           readonly,
+                       UnsafeDelete:       true,
                        IndexPageSize:      1000,
+                       cluster:            cluster,
+                       volume:             volume,
+                       logger:             ctxlog.TestLogger(c),
+                       metrics:            metrics,
                },
                c:           c,
                server:      srv,
                serverClock: clock,
        }
-       metrics := newVolumeMetricsVecs(prometheus.NewRegistry())
-       v.Start(metrics)
-       err = v.bucket.PutBucket(s3.ACL("private"))
-       c.Assert(err, check.IsNil)
+       c.Assert(v.S3Volume.check(), check.IsNil)
+       c.Assert(v.bucket.Bucket().PutBucket(s3.ACL("private")), check.IsNil)
+       // We couldn't set RaceWindow until now because check()
+       // rejects negative values.
+       v.S3Volume.RaceWindow = arvados.Duration(raceWindow)
        return v
 }
 
-func (s *StubbedS3Suite) TestConfig(c *check.C) {
-       var cfg Config
-       err := yaml.Unmarshal([]byte(`
-Volumes:
-  - Type: S3
-    StorageClasses: ["class_a", "class_b"]
-`), &cfg)
-
-       c.Check(err, check.IsNil)
-       c.Check(cfg.Volumes[0].GetStorageClasses(), check.DeepEquals, []string{"class_a", "class_b"})
-}
-
-func (v *TestableS3Volume) Start(vm *volumeMetricsVecs) error {
-       tmp, err := ioutil.TempFile("", "keepstore")
-       v.c.Assert(err, check.IsNil)
-       defer os.Remove(tmp.Name())
-       _, err = tmp.Write([]byte("xxx\n"))
-       v.c.Assert(err, check.IsNil)
-       v.c.Assert(tmp.Close(), check.IsNil)
-
-       v.S3Volume.AccessKeyFile = tmp.Name()
-       v.S3Volume.SecretKeyFile = tmp.Name()
-
-       v.c.Assert(v.S3Volume.Start(vm), check.IsNil)
-       return nil
-}
-
 // PutRaw skips the ContentMD5 test
 func (v *TestableS3Volume) PutRaw(loc string, block []byte) {
-       err := v.bucket.Put(loc, block, "application/octet-stream", s3ACL, s3.Options{})
+       err := v.bucket.Bucket().Put(loc, block, "application/octet-stream", s3ACL, s3.Options{})
        if err != nil {
                log.Printf("PutRaw: %s: %+v", loc, err)
        }
-       err = v.bucket.Put("recent/"+loc, nil, "application/octet-stream", s3ACL, s3.Options{})
+       err = v.bucket.Bucket().Put("recent/"+loc, nil, "application/octet-stream", s3ACL, s3.Options{})
        if err != nil {
                log.Printf("PutRaw: recent/%s: %+v", loc, err)
        }
@@ -483,7 +512,7 @@ func (v *TestableS3Volume) PutRaw(loc string, block []byte) {
 // while we do this.
 func (v *TestableS3Volume) TouchWithDate(locator string, lastPut time.Time) {
        v.serverClock.now = &lastPut
-       err := v.bucket.Put("recent/"+locator, nil, "application/octet-stream", s3ACL, s3.Options{})
+       err := v.bucket.Bucket().Put("recent/"+locator, nil, "application/octet-stream", s3ACL, s3.Options{})
        if err != nil {
                panic(err)
        }
diff --git a/services/keepstore/server.go b/services/keepstore/server.go
deleted file mode 100644 (file)
index 3f67277..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "crypto/tls"
-       "net"
-       "net/http"
-       "os"
-       "os/signal"
-       "syscall"
-)
-
-type server struct {
-       http.Server
-
-       // channel (size=1) with the current keypair
-       currentCert chan *tls.Certificate
-}
-
-func (srv *server) Serve(l net.Listener) error {
-       if theConfig.TLSCertificateFile == "" && theConfig.TLSKeyFile == "" {
-               return srv.Server.Serve(l)
-       }
-       // https://blog.gopheracademy.com/advent-2016/exposing-go-on-the-internet/
-       srv.TLSConfig = &tls.Config{
-               GetCertificate:           srv.getCertificate,
-               PreferServerCipherSuites: true,
-               CurvePreferences: []tls.CurveID{
-                       tls.CurveP256,
-                       tls.X25519,
-               },
-               MinVersion: tls.VersionTLS12,
-               CipherSuites: []uint16{
-                       tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
-                       tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
-                       tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,
-                       tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
-                       tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
-                       tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
-               },
-       }
-       srv.currentCert = make(chan *tls.Certificate, 1)
-       go srv.refreshCertificate(theConfig.TLSCertificateFile, theConfig.TLSKeyFile)
-       return srv.Server.ServeTLS(l, "", "")
-}
-
-func (srv *server) refreshCertificate(certfile, keyfile string) {
-       cert, err := tls.LoadX509KeyPair(certfile, keyfile)
-       if err != nil {
-               log.WithError(err).Fatal("error loading X509 key pair")
-       }
-       srv.currentCert <- &cert
-
-       reload := make(chan os.Signal, 1)
-       signal.Notify(reload, syscall.SIGHUP)
-       for range reload {
-               cert, err := tls.LoadX509KeyPair(certfile, keyfile)
-               if err != nil {
-                       log.WithError(err).Warn("error loading X509 key pair")
-                       continue
-               }
-               // Throw away old cert and start using new one
-               <-srv.currentCert
-               srv.currentCert <- &cert
-       }
-}
-
-func (srv *server) getCertificate(*tls.ClientHelloInfo) (*tls.Certificate, error) {
-       if srv.currentCert == nil {
-               panic("srv.currentCert not initialized")
-       }
-       cert := <-srv.currentCert
-       srv.currentCert <- cert
-       return cert, nil
-}
diff --git a/services/keepstore/server_test.go b/services/keepstore/server_test.go
deleted file mode 100644 (file)
index 84adf36..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "bytes"
-       "context"
-       "crypto/tls"
-       "io/ioutil"
-       "net"
-       "net/http"
-       "testing"
-)
-
-func TestTLS(t *testing.T) {
-       defer func() {
-               theConfig.TLSKeyFile = ""
-               theConfig.TLSCertificateFile = ""
-       }()
-       theConfig.TLSKeyFile = "../api/tmp/self-signed.key"
-       theConfig.TLSCertificateFile = "../api/tmp/self-signed.pem"
-       srv := &server{}
-       srv.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-               w.Write([]byte("OK"))
-       })
-       l, err := net.Listen("tcp", ":")
-       if err != nil {
-               t.Fatal(err)
-       }
-       defer l.Close()
-       go srv.Serve(l)
-       defer srv.Shutdown(context.Background())
-       c := &http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}}
-       resp, err := c.Get("https://" + l.Addr().String() + "/")
-       if err != nil {
-               t.Fatal(err)
-       }
-       body, err := ioutil.ReadAll(resp.Body)
-       if err != nil {
-               t.Error(err)
-       }
-       if !bytes.Equal(body, []byte("OK")) {
-               t.Errorf("expected OK, got %q", body)
-       }
-}
index dc6efb083dc6ed9f1fd597f7eb532fdc4f848c1d..7bff2584e5ae30529c7fb79dcccc6c95156521e2 100644 (file)
@@ -14,8 +14,8 @@ import (
 
 // getStatusItem("foo","bar","baz") retrieves /status.json, decodes
 // the response body into resp, and returns resp["foo"]["bar"]["baz"].
-func getStatusItem(keys ...string) interface{} {
-       resp := IssueRequest(&RequestTester{"/status.json", "", "GET", nil})
+func getStatusItem(h *handler, keys ...string) interface{} {
+       resp := IssueRequest(h, &RequestTester{"/status.json", "", "GET", nil})
        var s interface{}
        json.NewDecoder(resp.Body).Decode(&s)
        for _, k := range keys {
index 8a9fedfb7007ca21ef1d5d2e482ce66464fafe1a..ba1455ac657bca2f05dd808a23936af83a425e20 100644 (file)
@@ -6,6 +6,7 @@ package main
 
 import (
        "errors"
+       "log"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -17,35 +18,35 @@ import (
 //      Delete the block indicated by the trash request Locator
 //             Repeat
 //
-func RunTrashWorker(trashq *WorkQueue) {
+func RunTrashWorker(volmgr *RRVolumeManager, cluster *arvados.Cluster, trashq *WorkQueue) {
        for item := range trashq.NextItem {
                trashRequest := item.(TrashRequest)
-               TrashItem(trashRequest)
+               TrashItem(volmgr, cluster, trashRequest)
                trashq.DoneItem <- struct{}{}
        }
 }
 
 // TrashItem deletes the indicated block from every writable volume.
-func TrashItem(trashRequest TrashRequest) {
+func TrashItem(volmgr *RRVolumeManager, cluster *arvados.Cluster, trashRequest TrashRequest) {
        reqMtime := time.Unix(0, trashRequest.BlockMtime)
-       if time.Since(reqMtime) < theConfig.BlobSignatureTTL.Duration() {
+       if time.Since(reqMtime) < cluster.Collections.BlobSigningTTL.Duration() {
                log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blobSignatureTTL is %v! Skipping.",
                        arvados.Duration(time.Since(reqMtime)),
                        trashRequest.Locator,
                        trashRequest.BlockMtime,
                        reqMtime,
-                       theConfig.BlobSignatureTTL)
+                       cluster.Collections.BlobSigningTTL)
                return
        }
 
-       var volumes []Volume
+       var volumes []*VolumeMount
        if uuid := trashRequest.MountUUID; uuid == "" {
-               volumes = KeepVM.AllWritable()
-       } else if v := KeepVM.Lookup(uuid, true); v == nil {
+               volumes = volmgr.AllWritable()
+       } else if mnt := volmgr.Lookup(uuid, true); mnt == nil {
                log.Printf("warning: trash request for nonexistent mount: %v", trashRequest)
                return
        } else {
-               volumes = []Volume{v}
+               volumes = []*VolumeMount{mnt}
        }
 
        for _, volume := range volumes {
@@ -59,8 +60,8 @@ func TrashItem(trashRequest TrashRequest) {
                        continue
                }
 
-               if !theConfig.EnableDelete {
-                       err = errors.New("skipping because EnableDelete is false")
+               if !cluster.Collections.BlobTrash {
+                       err = errors.New("skipping because Collections.BlobTrash is false")
                } else {
                        err = volume.Trash(trashRequest.Locator)
                }
index c5a410b06f05c151bb401c31f9377316c573678a..bd3743090ab90f1c4dfd6434136cd1776a94ea22 100644 (file)
@@ -7,8 +7,10 @@ package main
 import (
        "container/list"
        "context"
-       "testing"
        "time"
+
+       "github.com/prometheus/client_golang/prometheus"
+       check "gopkg.in/check.v1"
 )
 
 type TrashWorkerTestData struct {
@@ -36,8 +38,8 @@ type TrashWorkerTestData struct {
 /* Delete block that does not exist in any of the keep volumes.
    Expect no errors.
 */
-func TestTrashWorkerIntegration_GetNonExistingLocator(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_GetNonExistingLocator(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: "5d41402abc4b2a76b9719d911017c592",
                Block1:   []byte("hello"),
@@ -52,14 +54,14 @@ func TestTrashWorkerIntegration_GetNonExistingLocator(t *testing.T) {
                ExpectLocator1: false,
                ExpectLocator2: false,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Delete a block that exists on volume 1 of the keep servers.
    Expect the second locator in volume 2 to be unaffected.
 */
-func TestTrashWorkerIntegration_LocatorInVolume1(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_LocatorInVolume1(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -74,14 +76,14 @@ func TestTrashWorkerIntegration_LocatorInVolume1(t *testing.T) {
                ExpectLocator1: false,
                ExpectLocator2: true,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Delete a block that exists on volume 2 of the keep servers.
    Expect the first locator in volume 1 to be unaffected.
 */
-func TestTrashWorkerIntegration_LocatorInVolume2(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_LocatorInVolume2(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -96,14 +98,14 @@ func TestTrashWorkerIntegration_LocatorInVolume2(t *testing.T) {
                ExpectLocator1: true,
                ExpectLocator2: false,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Delete a block with matching mtime for locator in both volumes.
    Expect locator to be deleted from both volumes.
 */
-func TestTrashWorkerIntegration_LocatorInBothVolumes(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_LocatorInBothVolumes(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -118,14 +120,14 @@ func TestTrashWorkerIntegration_LocatorInBothVolumes(t *testing.T) {
                ExpectLocator1: false,
                ExpectLocator2: false,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Same locator with different Mtimes exists in both volumes.
    Delete the second and expect the first to be still around.
 */
-func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -141,14 +143,14 @@ func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *test
                ExpectLocator1: true,
                ExpectLocator2: false,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 // Delete a block that exists on both volumes with matching mtimes,
 // but specify a MountUUID in the request so it only gets deleted from
 // the first volume.
-func TestTrashWorkerIntegration_SpecifyMountUUID(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_SpecifyMountUUID(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -164,15 +166,15 @@ func TestTrashWorkerIntegration_SpecifyMountUUID(t *testing.T) {
                ExpectLocator1: true,
                ExpectLocator2: true,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Two different locators in volume 1.
    Delete one of them.
    Expect the other unaffected.
 */
-func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -188,14 +190,14 @@ func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
                ExpectLocator1: false,
                ExpectLocator2: true,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Allow default Trash Life time to be used. Thus, the newly created block
    will not be deleted because its Mtime is within the trash life time.
 */
-func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(t *testing.T) {
-       theConfig.EnableDelete = true
+func (s *HandlerSuite) TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(c *check.C) {
+       s.cluster.Collections.BlobTrash = true
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -214,14 +216,14 @@ func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(
                ExpectLocator1: true,
                ExpectLocator2: true,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Delete a block with matching mtime for locator in both volumes, but EnableDelete is false,
    so block won't be deleted.
 */
-func TestTrashWorkerIntegration_DisabledDelete(t *testing.T) {
-       theConfig.EnableDelete = false
+func (s *HandlerSuite) TestTrashWorkerIntegration_DisabledDelete(c *check.C) {
+       s.cluster.Collections.BlobTrash = false
        testData := TrashWorkerTestData{
                Locator1: TestHash,
                Block1:   TestBlock,
@@ -236,31 +238,34 @@ func TestTrashWorkerIntegration_DisabledDelete(t *testing.T) {
                ExpectLocator1: true,
                ExpectLocator2: true,
        }
-       performTrashWorkerTest(testData, t)
+       s.performTrashWorkerTest(c, testData)
 }
 
 /* Perform the test */
-func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
-       // Create Keep Volumes
-       KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Close()
+func (s *HandlerSuite) performTrashWorkerTest(c *check.C, testData TrashWorkerTestData) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+       // Replace the router's trashq -- which the worker goroutines
+       // started by setup() are now receiving from -- with a new
+       // one, so we can see what the handler sends to it.
+       trashq := NewWorkQueue()
+       s.handler.Handler.(*router).trashq = trashq
 
        // Put test content
-       vols := KeepVM.AllWritable()
+       mounts := s.handler.volmgr.AllWritable()
        if testData.CreateData {
-               vols[0].Put(context.Background(), testData.Locator1, testData.Block1)
-               vols[0].Put(context.Background(), testData.Locator1+".meta", []byte("metadata"))
+               mounts[0].Put(context.Background(), testData.Locator1, testData.Block1)
+               mounts[0].Put(context.Background(), testData.Locator1+".meta", []byte("metadata"))
 
                if testData.CreateInVolume1 {
-                       vols[0].Put(context.Background(), testData.Locator2, testData.Block2)
-                       vols[0].Put(context.Background(), testData.Locator2+".meta", []byte("metadata"))
+                       mounts[0].Put(context.Background(), testData.Locator2, testData.Block2)
+                       mounts[0].Put(context.Background(), testData.Locator2+".meta", []byte("metadata"))
                } else {
-                       vols[1].Put(context.Background(), testData.Locator2, testData.Block2)
-                       vols[1].Put(context.Background(), testData.Locator2+".meta", []byte("metadata"))
+                       mounts[1].Put(context.Background(), testData.Locator2, testData.Block2)
+                       mounts[1].Put(context.Background(), testData.Locator2+".meta", []byte("metadata"))
                }
        }
 
-       oldBlockTime := time.Now().Add(-theConfig.BlobSignatureTTL.Duration() - time.Minute)
+       oldBlockTime := time.Now().Add(-s.cluster.Collections.BlobSigningTTL.Duration() - time.Minute)
 
        // Create TrashRequest for the test
        trashRequest := TrashRequest{
@@ -268,37 +273,35 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
                BlockMtime: oldBlockTime.UnixNano(),
        }
        if testData.SpecifyMountUUID {
-               trashRequest.MountUUID = KeepVM.Mounts()[0].UUID
+               trashRequest.MountUUID = s.handler.volmgr.Mounts()[0].UUID
        }
 
        // Run trash worker and put the trashRequest on trashq
        trashList := list.New()
        trashList.PushBack(trashRequest)
-       trashq = NewWorkQueue()
-       defer trashq.Close()
 
        if !testData.UseTrashLifeTime {
                // Trash worker would not delete block if its Mtime is
                // within trash life time. Back-date the block to
                // allow the deletion to succeed.
-               for _, v := range vols {
-                       v.(*MockVolume).Timestamps[testData.DeleteLocator] = oldBlockTime
+               for _, mnt := range mounts {
+                       mnt.Volume.(*MockVolume).Timestamps[testData.DeleteLocator] = oldBlockTime
                        if testData.DifferentMtimes {
                                oldBlockTime = oldBlockTime.Add(time.Second)
                        }
                }
        }
-       go RunTrashWorker(trashq)
+       go RunTrashWorker(s.handler.volmgr, s.cluster, trashq)
 
        // Install gate so all local operations block until we say go
        gate := make(chan struct{})
-       for _, v := range vols {
-               v.(*MockVolume).Gate = gate
+       for _, mnt := range mounts {
+               mnt.Volume.(*MockVolume).Gate = gate
        }
 
        assertStatusItem := func(k string, expect float64) {
-               if v := getStatusItem("TrashQueue", k); v != expect {
-                       t.Errorf("Got %s %v, expected %v", k, v, expect)
+               if v := getStatusItem(s.handler, "TrashQueue", k); v != expect {
+                       c.Errorf("Got %s %v, expected %v", k, v, expect)
                }
        }
 
@@ -309,7 +312,7 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
        trashq.ReplaceQueue(trashList)
 
        // Wait for worker to take request(s)
-       expectEqualWithin(t, time.Second, listLen, func() interface{} { return trashq.Status().InProgress })
+       expectEqualWithin(c, time.Second, listLen, func() interface{} { return trashq.Status().InProgress })
 
        // Ensure status.json also reports work is happening
        assertStatusItem("InProgress", float64(1))
@@ -319,31 +322,31 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
        close(gate)
 
        // Wait for worker to finish
-       expectEqualWithin(t, time.Second, 0, func() interface{} { return trashq.Status().InProgress })
+       expectEqualWithin(c, time.Second, 0, func() interface{} { return trashq.Status().InProgress })
 
        // Verify Locator1 to be un/deleted as expected
        buf := make([]byte, BlockSize)
-       size, err := GetBlock(context.Background(), testData.Locator1, buf, nil)
+       size, err := GetBlock(context.Background(), s.handler.volmgr, testData.Locator1, buf, nil)
        if testData.ExpectLocator1 {
                if size == 0 || err != nil {
-                       t.Errorf("Expected Locator1 to be still present: %s", testData.Locator1)
+                       c.Errorf("Expected Locator1 to be still present: %s", testData.Locator1)
                }
        } else {
                if size > 0 || err == nil {
-                       t.Errorf("Expected Locator1 to be deleted: %s", testData.Locator1)
+                       c.Errorf("Expected Locator1 to be deleted: %s", testData.Locator1)
                }
        }
 
        // Verify Locator2 to be un/deleted as expected
        if testData.Locator1 != testData.Locator2 {
-               size, err = GetBlock(context.Background(), testData.Locator2, buf, nil)
+               size, err = GetBlock(context.Background(), s.handler.volmgr, testData.Locator2, buf, nil)
                if testData.ExpectLocator2 {
                        if size == 0 || err != nil {
-                               t.Errorf("Expected Locator2 to be still present: %s", testData.Locator2)
+                               c.Errorf("Expected Locator2 to be still present: %s", testData.Locator2)
                        }
                } else {
                        if size > 0 || err == nil {
-                               t.Errorf("Expected Locator2 to be deleted: %s", testData.Locator2)
+                               c.Errorf("Expected Locator2 to be deleted: %s", testData.Locator2)
                        }
                }
        }
@@ -353,14 +356,12 @@ func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
        // the trash request.
        if testData.DifferentMtimes {
                locatorFoundIn := 0
-               for _, volume := range KeepVM.AllReadable() {
+               for _, volume := range s.handler.volmgr.AllReadable() {
                        buf := make([]byte, BlockSize)
                        if _, err := volume.Get(context.Background(), testData.Locator1, buf); err == nil {
                                locatorFoundIn = locatorFoundIn + 1
                        }
                }
-               if locatorFoundIn != 1 {
-                       t.Errorf("Found %d copies of %s, expected 1", locatorFoundIn, testData.Locator1)
-               }
+               c.Check(locatorFoundIn, check.Equals, 1)
        }
 }
index 4d9e798ac67c71c2a81f51abeb2128b340a6cda6..6504f9c16b166cf7d5222f59988939beff878802 100644 (file)
@@ -5,12 +5,13 @@
 package main
 
 import (
-       "bufio"
        "context"
-       "flag"
+       "encoding/json"
+       "errors"
        "fmt"
        "io"
        "io/ioutil"
+       "log"
        "os"
        "os/exec"
        "path/filepath"
@@ -22,98 +23,52 @@ import (
        "syscall"
        "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
-type unixVolumeAdder struct {
-       *Config
-}
-
-// String implements flag.Value
-func (vs *unixVolumeAdder) String() string {
-       return "-"
+func init() {
+       driver["Directory"] = newDirectoryVolume
 }
 
-func (vs *unixVolumeAdder) Set(path string) error {
-       if dirs := strings.Split(path, ","); len(dirs) > 1 {
-               log.Print("DEPRECATED: using comma-separated volume list.")
-               for _, dir := range dirs {
-                       if err := vs.Set(dir); err != nil {
-                               return err
-                       }
-               }
-               return nil
+func newDirectoryVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
+       v := &UnixVolume{cluster: cluster, volume: volume, logger: logger, metrics: metrics}
+       err := json.Unmarshal(volume.DriverParameters, &v)
+       if err != nil {
+               return nil, err
        }
-       vs.Config.Volumes = append(vs.Config.Volumes, &UnixVolume{
-               Root:      path,
-               ReadOnly:  deprecated.flagReadonly,
-               Serialize: deprecated.flagSerializeIO,
-       })
-       return nil
-}
-
-func init() {
-       VolumeTypes = append(VolumeTypes, func() VolumeWithExamples { return &UnixVolume{} })
-
-       flag.Var(&unixVolumeAdder{theConfig}, "volumes", "see Volumes configuration")
-       flag.Var(&unixVolumeAdder{theConfig}, "volume", "see Volumes configuration")
+       return v, v.check()
 }
 
-// Discover adds a UnixVolume for every directory named "keep" that is
-// located at the top level of a device- or tmpfs-backed mount point
-// other than "/". It returns the number of volumes added.
-func (vs *unixVolumeAdder) Discover() int {
-       added := 0
-       f, err := os.Open(ProcMounts)
-       if err != nil {
-               log.Fatalf("opening %s: %s", ProcMounts, err)
+func (v *UnixVolume) check() error {
+       if v.Root == "" {
+               return errors.New("DriverParameters.Root was not provided")
        }
-       scanner := bufio.NewScanner(f)
-       for scanner.Scan() {
-               args := strings.Fields(scanner.Text())
-               if err := scanner.Err(); err != nil {
-                       log.Fatalf("reading %s: %s", ProcMounts, err)
-               }
-               dev, mount := args[0], args[1]
-               if mount == "/" {
-                       continue
-               }
-               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
-                       continue
-               }
-               keepdir := mount + "/keep"
-               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
-                       continue
-               }
-               // Set the -readonly flag (but only for this volume)
-               // if the filesystem is mounted readonly.
-               flagReadonlyWas := deprecated.flagReadonly
-               for _, fsopt := range strings.Split(args[3], ",") {
-                       if fsopt == "ro" {
-                               deprecated.flagReadonly = true
-                               break
-                       }
-                       if fsopt == "rw" {
-                               break
-                       }
-               }
-               if err := vs.Set(keepdir); err != nil {
-                       log.Printf("adding %q: %s", keepdir, err)
-               } else {
-                       added++
-               }
-               deprecated.flagReadonly = flagReadonlyWas
+       if v.Serialize {
+               v.locker = &sync.Mutex{}
+       }
+       if !strings.HasPrefix(v.Root, "/") {
+               return fmt.Errorf("DriverParameters.Root %q does not start with '/'", v.Root)
        }
-       return added
+
+       // Set up prometheus metrics
+       lbls := prometheus.Labels{"device_id": v.GetDeviceID()}
+       v.os.stats.opsCounters, v.os.stats.errCounters, v.os.stats.ioBytes = v.metrics.getCounterVecsFor(lbls)
+
+       _, err := v.os.Stat(v.Root)
+       return err
 }
 
 // A UnixVolume stores and retrieves blocks in a local directory.
 type UnixVolume struct {
-       Root                 string // path to the volume's root directory
-       ReadOnly             bool
-       Serialize            bool
-       DirectoryReplication int
-       StorageClasses       []string
+       Root      string // path to the volume's root directory
+       Serialize bool
+
+       cluster *arvados.Cluster
+       volume  arvados.Volume
+       logger  logrus.FieldLogger
+       metrics *volumeMetricsVecs
 
        // something to lock during IO, typically a sync.Mutex (or nil
        // to skip locking)
@@ -122,12 +77,12 @@ type UnixVolume struct {
        os osWithStats
 }
 
-// DeviceID returns a globally unique ID for the volume's root
+// GetDeviceID returns a globally unique ID for the volume's root
 // directory, consisting of the filesystem's UUID and the path from
 // filesystem root to storage directory, joined by "/". For example,
-// the DeviceID for a local directory "/mnt/xvda1/keep" might be
+// the device ID for a local directory "/mnt/xvda1/keep" might be
 // "fa0b6166-3b55-4994-bd3f-92f4e00a1bb0/keep".
-func (v *UnixVolume) DeviceID() string {
+func (v *UnixVolume) GetDeviceID() string {
        giveup := func(f string, args ...interface{}) string {
                log.Printf(f+"; using blank DeviceID for volume %s", append(args, v)...)
                return ""
@@ -198,50 +153,9 @@ func (v *UnixVolume) DeviceID() string {
        return giveup("could not find entry in %q matching %q", udir, dev)
 }
 
-// Examples implements VolumeWithExamples.
-func (*UnixVolume) Examples() []Volume {
-       return []Volume{
-               &UnixVolume{
-                       Root:                 "/mnt/local-disk",
-                       Serialize:            true,
-                       DirectoryReplication: 1,
-               },
-               &UnixVolume{
-                       Root:                 "/mnt/network-disk",
-                       Serialize:            false,
-                       DirectoryReplication: 2,
-               },
-       }
-}
-
-// Type implements Volume
-func (v *UnixVolume) Type() string {
-       return "Directory"
-}
-
-// Start implements Volume
-func (v *UnixVolume) Start(vm *volumeMetricsVecs) error {
-       if v.Serialize {
-               v.locker = &sync.Mutex{}
-       }
-       if !strings.HasPrefix(v.Root, "/") {
-               return fmt.Errorf("volume root does not start with '/': %q", v.Root)
-       }
-       if v.DirectoryReplication == 0 {
-               v.DirectoryReplication = 1
-       }
-       // Set up prometheus metrics
-       lbls := prometheus.Labels{"device_id": v.DeviceID()}
-       v.os.stats.opsCounters, v.os.stats.errCounters, v.os.stats.ioBytes = vm.getCounterVecsFor(lbls)
-
-       _, err := v.os.Stat(v.Root)
-
-       return err
-}
-
 // Touch sets the timestamp for the given locator to the current time
 func (v *UnixVolume) Touch(loc string) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        p := v.blockPath(loc)
@@ -349,7 +263,7 @@ func (v *UnixVolume) Put(ctx context.Context, loc string, block []byte) error {
 
 // WriteBlock implements BlockWriter.
 func (v *UnixVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader) error {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        if v.IsFull() {
@@ -504,9 +418,9 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
 }
 
 // Trash trashes the block data from the unix storage
-// If TrashLifetime == 0, the block is deleted
+// If BlobTrashLifetime == 0, the block is deleted
 // Else, the block is renamed as path/{loc}.trash.{deadline},
-// where deadline = now + TrashLifetime
+// where deadline = now + BlobTrashLifetime
 func (v *UnixVolume) Trash(loc string) error {
        // Touch() must be called before calling Write() on a block.  Touch()
        // also uses lockfile().  This avoids a race condition between Write()
@@ -516,7 +430,7 @@ func (v *UnixVolume) Trash(loc string) error {
        // Trash() will read the correct up-to-date timestamp and choose not to
        // trash the file.
 
-       if v.ReadOnly {
+       if v.volume.ReadOnly || !v.cluster.Collections.BlobTrash {
                return MethodDisabledError
        }
        if err := v.lock(context.TODO()); err != nil {
@@ -541,21 +455,21 @@ func (v *UnixVolume) Trash(loc string) error {
        // anyway (because the permission signatures have expired).
        if fi, err := v.os.Stat(p); err != nil {
                return err
-       } else if time.Since(fi.ModTime()) < time.Duration(theConfig.BlobSignatureTTL) {
+       } else if time.Since(fi.ModTime()) < v.cluster.Collections.BlobSigningTTL.Duration() {
                return nil
        }
 
-       if theConfig.TrashLifetime == 0 {
+       if v.cluster.Collections.BlobTrashLifetime == 0 {
                return v.os.Remove(p)
        }
-       return v.os.Rename(p, fmt.Sprintf("%v.trash.%d", p, time.Now().Add(theConfig.TrashLifetime.Duration()).Unix()))
+       return v.os.Rename(p, fmt.Sprintf("%v.trash.%d", p, time.Now().Add(v.cluster.Collections.BlobTrashLifetime.Duration()).Unix()))
 }
 
 // Untrash moves block from trash back into store
 // Look for path/{loc}.trash.{deadline} in storage,
 // and rename the first such file as path/{loc}
 func (v *UnixVolume) Untrash(loc string) (err error) {
-       if v.ReadOnly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
 
@@ -650,23 +564,6 @@ func (v *UnixVolume) String() string {
        return fmt.Sprintf("[UnixVolume %s]", v.Root)
 }
 
-// Writable returns false if all future Put, Mtime, and Delete calls
-// are expected to fail.
-func (v *UnixVolume) Writable() bool {
-       return !v.ReadOnly
-}
-
-// Replication returns the number of replicas promised by the
-// underlying device (as specified in configuration).
-func (v *UnixVolume) Replication() int {
-       return v.DirectoryReplication
-}
-
-// GetStorageClasses implements Volume
-func (v *UnixVolume) GetStorageClasses() []string {
-       return v.StorageClasses
-}
-
 // InternalStats returns I/O and filesystem ops counters.
 func (v *UnixVolume) InternalStats() interface{} {
        return &v.os.stats
@@ -739,6 +636,10 @@ var unixTrashLocRegexp = regexp.MustCompile(`/([0-9a-f]{32})\.trash\.(\d+)$`)
 // EmptyTrash walks hierarchy looking for {hash}.trash.*
 // and deletes those with deadline < now.
 func (v *UnixVolume) EmptyTrash() {
+       if v.cluster.Collections.BlobDeleteConcurrency < 1 {
+               return
+       }
+
        var bytesDeleted, bytesInTrash int64
        var blocksDeleted, blocksInTrash int64
 
@@ -774,8 +675,8 @@ func (v *UnixVolume) EmptyTrash() {
                info os.FileInfo
        }
        var wg sync.WaitGroup
-       todo := make(chan dirent, theConfig.EmptyTrashWorkers)
-       for i := 0; i < 1 || i < theConfig.EmptyTrashWorkers; i++ {
+       todo := make(chan dirent, v.cluster.Collections.BlobDeleteConcurrency)
+       for i := 0; i < v.cluster.Collections.BlobDeleteConcurrency; i++ {
                wg.Add(1)
                go func() {
                        defer wg.Done()
index 872f408cf8cd68571705d240cb6b6184fce70a1d..1ffc46513cb571b94e4953ba231ba62983c99398 100644 (file)
@@ -16,11 +16,11 @@ import (
        "strings"
        "sync"
        "syscall"
-       "testing"
        "time"
 
-       "github.com/ghodss/yaml"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
@@ -29,32 +29,13 @@ type TestableUnixVolume struct {
        t TB
 }
 
-func NewTestableUnixVolume(t TB, serialize bool, readonly bool) *TestableUnixVolume {
-       d, err := ioutil.TempDir("", "volume_test")
-       if err != nil {
-               t.Fatal(err)
-       }
-       var locker sync.Locker
-       if serialize {
-               locker = &sync.Mutex{}
-       }
-       return &TestableUnixVolume{
-               UnixVolume: UnixVolume{
-                       Root:     d,
-                       ReadOnly: readonly,
-                       locker:   locker,
-               },
-               t: t,
-       }
-}
-
 // PutRaw writes a Keep block directly into a UnixVolume, even if
 // the volume is readonly.
 func (v *TestableUnixVolume) PutRaw(locator string, data []byte) {
        defer func(orig bool) {
-               v.ReadOnly = orig
-       }(v.ReadOnly)
-       v.ReadOnly = false
+               v.volume.ReadOnly = orig
+       }(v.volume.ReadOnly)
+       v.volume.ReadOnly = false
        err := v.Put(context.Background(), locator, data)
        if err != nil {
                v.t.Fatal(err)
@@ -70,7 +51,7 @@ func (v *TestableUnixVolume) TouchWithDate(locator string, lastPut time.Time) {
 
 func (v *TestableUnixVolume) Teardown() {
        if err := os.RemoveAll(v.Root); err != nil {
-               v.t.Fatal(err)
+               v.t.Error(err)
        }
 }
 
@@ -78,59 +59,77 @@ func (v *TestableUnixVolume) ReadWriteOperationLabelValues() (r, w string) {
        return "open", "create"
 }
 
+var _ = check.Suite(&UnixVolumeSuite{})
+
+type UnixVolumeSuite struct {
+       cluster *arvados.Cluster
+       volumes []*TestableUnixVolume
+       metrics *volumeMetricsVecs
+}
+
+func (s *UnixVolumeSuite) SetUpTest(c *check.C) {
+       s.cluster = testCluster(c)
+       s.metrics = newVolumeMetricsVecs(prometheus.NewRegistry())
+}
+
+func (s *UnixVolumeSuite) TearDownTest(c *check.C) {
+       for _, v := range s.volumes {
+               v.Teardown()
+       }
+}
+
+func (s *UnixVolumeSuite) newTestableUnixVolume(c *check.C, cluster *arvados.Cluster, volume arvados.Volume, metrics *volumeMetricsVecs, serialize bool) *TestableUnixVolume {
+       d, err := ioutil.TempDir("", "volume_test")
+       c.Check(err, check.IsNil)
+       var locker sync.Locker
+       if serialize {
+               locker = &sync.Mutex{}
+       }
+       v := &TestableUnixVolume{
+               UnixVolume: UnixVolume{
+                       Root:    d,
+                       locker:  locker,
+                       cluster: cluster,
+                       volume:  volume,
+                       metrics: metrics,
+               },
+               t: c,
+       }
+       c.Check(v.check(), check.IsNil)
+       s.volumes = append(s.volumes, v)
+       return v
+}
+
 // serialize = false; readonly = false
-func TestUnixVolumeWithGenericTests(t *testing.T) {
-       DoGenericVolumeTests(t, func(t TB) TestableVolume {
-               return NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestUnixVolumeWithGenericTests(c *check.C) {
+       DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableUnixVolume(c, cluster, volume, metrics, false)
        })
 }
 
 // serialize = false; readonly = true
-func TestUnixVolumeWithGenericTestsReadOnly(t *testing.T) {
-       DoGenericVolumeTests(t, func(t TB) TestableVolume {
-               return NewTestableUnixVolume(t, false, true)
+func (s *UnixVolumeSuite) TestUnixVolumeWithGenericTestsReadOnly(c *check.C) {
+       DoGenericVolumeTests(c, true, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableUnixVolume(c, cluster, volume, metrics, true)
        })
 }
 
 // serialize = true; readonly = false
-func TestUnixVolumeWithGenericTestsSerialized(t *testing.T) {
-       DoGenericVolumeTests(t, func(t TB) TestableVolume {
-               return NewTestableUnixVolume(t, true, false)
+func (s *UnixVolumeSuite) TestUnixVolumeWithGenericTestsSerialized(c *check.C) {
+       DoGenericVolumeTests(c, false, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableUnixVolume(c, cluster, volume, metrics, false)
        })
 }
 
-// serialize = false; readonly = false
-func TestUnixVolumeHandlersWithGenericVolumeTests(t *testing.T) {
-       DoHandlersWithGenericVolumeTests(t, func(t TB) (*RRVolumeManager, []TestableVolume) {
-               vols := make([]Volume, 2)
-               testableUnixVols := make([]TestableVolume, 2)
-
-               for i := range vols {
-                       v := NewTestableUnixVolume(t, false, false)
-                       vols[i] = v
-                       testableUnixVols[i] = v
-               }
-
-               return MakeRRVolumeManager(vols), testableUnixVols
+// serialize = true; readonly = true
+func (s *UnixVolumeSuite) TestUnixVolumeHandlersWithGenericVolumeTests(c *check.C) {
+       DoGenericVolumeTests(c, true, func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume {
+               return s.newTestableUnixVolume(c, cluster, volume, metrics, true)
        })
 }
 
-func TestReplicationDefault1(t *testing.T) {
-       v := &UnixVolume{
-               Root:     "/",
-               ReadOnly: true,
-       }
-       metrics := newVolumeMetricsVecs(prometheus.NewRegistry())
-       if err := v.Start(metrics); err != nil {
-               t.Error(err)
-       }
-       if got := v.Replication(); got != 1 {
-               t.Errorf("Replication() returned %d, expected 1 if no config given", got)
-       }
-}
-
-func TestGetNotFound(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestGetNotFound(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
        v.Put(context.Background(), TestHash, TestBlock)
 
@@ -140,42 +139,42 @@ func TestGetNotFound(t *testing.T) {
        case os.IsNotExist(err):
                break
        case err == nil:
-               t.Errorf("Read should have failed, returned %+q", buf[:n])
+               c.Errorf("Read should have failed, returned %+q", buf[:n])
        default:
-               t.Errorf("Read expected ErrNotExist, got: %s", err)
+               c.Errorf("Read expected ErrNotExist, got: %s", err)
        }
 }
 
-func TestPut(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestPut(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        err := v.Put(context.Background(), TestHash, TestBlock)
        if err != nil {
-               t.Error(err)
+               c.Error(err)
        }
        p := fmt.Sprintf("%s/%s/%s", v.Root, TestHash[:3], TestHash)
        if buf, err := ioutil.ReadFile(p); err != nil {
-               t.Error(err)
+               c.Error(err)
        } else if bytes.Compare(buf, TestBlock) != 0 {
-               t.Errorf("Write should have stored %s, did store %s",
+               c.Errorf("Write should have stored %s, did store %s",
                        string(TestBlock), string(buf))
        }
 }
 
-func TestPutBadVolume(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestPutBadVolume(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        os.Chmod(v.Root, 000)
        err := v.Put(context.Background(), TestHash, TestBlock)
        if err == nil {
-               t.Error("Write should have failed")
+               c.Error("Write should have failed")
        }
 }
 
-func TestUnixVolumeReadonly(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, true)
+func (s *UnixVolumeSuite) TestUnixVolumeReadonly(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{ReadOnly: true, Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        v.PutRaw(TestHash, TestBlock)
@@ -183,34 +182,34 @@ func TestUnixVolumeReadonly(t *testing.T) {
        buf := make([]byte, BlockSize)
        _, err := v.Get(context.Background(), TestHash, buf)
        if err != nil {
-               t.Errorf("got err %v, expected nil", err)
+               c.Errorf("got err %v, expected nil", err)
        }
 
        err = v.Put(context.Background(), TestHash, TestBlock)
        if err != MethodDisabledError {
-               t.Errorf("got err %v, expected MethodDisabledError", err)
+               c.Errorf("got err %v, expected MethodDisabledError", err)
        }
 
        err = v.Touch(TestHash)
        if err != MethodDisabledError {
-               t.Errorf("got err %v, expected MethodDisabledError", err)
+               c.Errorf("got err %v, expected MethodDisabledError", err)
        }
 
        err = v.Trash(TestHash)
        if err != MethodDisabledError {
-               t.Errorf("got err %v, expected MethodDisabledError", err)
+               c.Errorf("got err %v, expected MethodDisabledError", err)
        }
 }
 
-func TestIsFull(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestIsFull(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        fullPath := v.Root + "/full"
        now := fmt.Sprintf("%d", time.Now().Unix())
        os.Symlink(now, fullPath)
        if !v.IsFull() {
-               t.Errorf("%s: claims not to be full", v)
+               c.Errorf("%s: claims not to be full", v)
        }
        os.Remove(fullPath)
 
@@ -218,32 +217,32 @@ func TestIsFull(t *testing.T) {
        expired := fmt.Sprintf("%d", time.Now().Unix()-3605)
        os.Symlink(expired, fullPath)
        if v.IsFull() {
-               t.Errorf("%s: should no longer be full", v)
+               c.Errorf("%s: should no longer be full", v)
        }
 }
 
-func TestNodeStatus(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestNodeStatus(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        // Get node status and make a basic sanity check.
        volinfo := v.Status()
        if volinfo.MountPoint != v.Root {
-               t.Errorf("GetNodeStatus mount_point %s, expected %s", volinfo.MountPoint, v.Root)
+               c.Errorf("GetNodeStatus mount_point %s, expected %s", volinfo.MountPoint, v.Root)
        }
        if volinfo.DeviceNum == 0 {
-               t.Errorf("uninitialized device_num in %v", volinfo)
+               c.Errorf("uninitialized device_num in %v", volinfo)
        }
        if volinfo.BytesFree == 0 {
-               t.Errorf("uninitialized bytes_free in %v", volinfo)
+               c.Errorf("uninitialized bytes_free in %v", volinfo)
        }
        if volinfo.BytesUsed == 0 {
-               t.Errorf("uninitialized bytes_used in %v", volinfo)
+               c.Errorf("uninitialized bytes_used in %v", volinfo)
        }
 }
 
-func TestUnixVolumeGetFuncWorkerError(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestUnixVolumeGetFuncWorkerError(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        v.Put(context.Background(), TestHash, TestBlock)
@@ -252,12 +251,12 @@ func TestUnixVolumeGetFuncWorkerError(t *testing.T) {
                return mockErr
        })
        if err != mockErr {
-               t.Errorf("Got %v, expected %v", err, mockErr)
+               c.Errorf("Got %v, expected %v", err, mockErr)
        }
 }
 
-func TestUnixVolumeGetFuncFileError(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestUnixVolumeGetFuncFileError(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        funcCalled := false
@@ -266,15 +265,15 @@ func TestUnixVolumeGetFuncFileError(t *testing.T) {
                return nil
        })
        if err == nil {
-               t.Errorf("Expected error opening non-existent file")
+               c.Errorf("Expected error opening non-existent file")
        }
        if funcCalled {
-               t.Errorf("Worker func should not have been called")
+               c.Errorf("Worker func should not have been called")
        }
 }
 
-func TestUnixVolumeGetFuncWorkerWaitsOnMutex(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestUnixVolumeGetFuncWorkerWaitsOnMutex(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        v.Put(context.Background(), TestHash, TestBlock)
@@ -290,55 +289,55 @@ func TestUnixVolumeGetFuncWorkerWaitsOnMutex(t *testing.T) {
        select {
        case mtx.AllowLock <- struct{}{}:
        case <-funcCalled:
-               t.Fatal("Function was called before mutex was acquired")
+               c.Fatal("Function was called before mutex was acquired")
        case <-time.After(5 * time.Second):
-               t.Fatal("Timed out before mutex was acquired")
+               c.Fatal("Timed out before mutex was acquired")
        }
        select {
        case <-funcCalled:
        case mtx.AllowUnlock <- struct{}{}:
-               t.Fatal("Mutex was released before function was called")
+               c.Fatal("Mutex was released before function was called")
        case <-time.After(5 * time.Second):
-               t.Fatal("Timed out waiting for funcCalled")
+               c.Fatal("Timed out waiting for funcCalled")
        }
        select {
        case mtx.AllowUnlock <- struct{}{}:
        case <-time.After(5 * time.Second):
-               t.Fatal("Timed out waiting for getFunc() to release mutex")
+               c.Fatal("Timed out waiting for getFunc() to release mutex")
        }
 }
 
-func TestUnixVolumeCompare(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestUnixVolumeCompare(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
        v.Put(context.Background(), TestHash, TestBlock)
        err := v.Compare(context.Background(), TestHash, TestBlock)
        if err != nil {
-               t.Errorf("Got err %q, expected nil", err)
+               c.Errorf("Got err %q, expected nil", err)
        }
 
        err = v.Compare(context.Background(), TestHash, []byte("baddata"))
        if err != CollisionError {
-               t.Errorf("Got err %q, expected %q", err, CollisionError)
+               c.Errorf("Got err %q, expected %q", err, CollisionError)
        }
 
        v.Put(context.Background(), TestHash, []byte("baddata"))
        err = v.Compare(context.Background(), TestHash, TestBlock)
        if err != DiskHashError {
-               t.Errorf("Got err %q, expected %q", err, DiskHashError)
+               c.Errorf("Got err %q, expected %q", err, DiskHashError)
        }
 
        p := fmt.Sprintf("%s/%s/%s", v.Root, TestHash[:3], TestHash)
        os.Chmod(p, 000)
        err = v.Compare(context.Background(), TestHash, TestBlock)
        if err == nil || strings.Index(err.Error(), "permission denied") < 0 {
-               t.Errorf("Got err %q, expected %q", err, "permission denied")
+               c.Errorf("Got err %q, expected %q", err, "permission denied")
        }
 }
 
-func TestUnixVolumeContextCancelPut(t *testing.T) {
-       v := NewTestableUnixVolume(t, true, false)
+func (s *UnixVolumeSuite) TestUnixVolumeContextCancelPut(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, true)
        defer v.Teardown()
        v.locker.Lock()
        ctx, cancel := context.WithCancel(context.Background())
@@ -350,19 +349,19 @@ func TestUnixVolumeContextCancelPut(t *testing.T) {
        }()
        err := v.Put(ctx, TestHash, TestBlock)
        if err != context.Canceled {
-               t.Errorf("Put() returned %s -- expected short read / canceled", err)
+               c.Errorf("Put() returned %s -- expected short read / canceled", err)
        }
 }
 
-func TestUnixVolumeContextCancelGet(t *testing.T) {
-       v := NewTestableUnixVolume(t, false, false)
+func (s *UnixVolumeSuite) TestUnixVolumeContextCancelGet(c *check.C) {
+       v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
        bpath := v.blockPath(TestHash)
        v.PutRaw(TestHash, TestBlock)
        os.Remove(bpath)
        err := syscall.Mkfifo(bpath, 0600)
        if err != nil {
-               t.Fatalf("Mkfifo %s: %s", bpath, err)
+               c.Fatalf("Mkfifo %s: %s", bpath, err)
        }
        defer os.Remove(bpath)
        ctx, cancel := context.WithCancel(context.Background())
@@ -373,35 +372,23 @@ func TestUnixVolumeContextCancelGet(t *testing.T) {
        buf := make([]byte, len(TestBlock))
        n, err := v.Get(ctx, TestHash, buf)
        if n == len(TestBlock) || err != context.Canceled {
-               t.Errorf("Get() returned %d, %s -- expected short read / canceled", n, err)
-       }
-}
-
-var _ = check.Suite(&UnixVolumeSuite{})
-
-type UnixVolumeSuite struct {
-       volume *TestableUnixVolume
-}
-
-func (s *UnixVolumeSuite) TearDownTest(c *check.C) {
-       if s.volume != nil {
-               s.volume.Teardown()
+               c.Errorf("Get() returned %d, %s -- expected short read / canceled", n, err)
        }
 }
 
 func (s *UnixVolumeSuite) TestStats(c *check.C) {
-       s.volume = NewTestableUnixVolume(c, false, false)
+       vol := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        stats := func() string {
-               buf, err := json.Marshal(s.volume.InternalStats())
+               buf, err := json.Marshal(vol.InternalStats())
                c.Check(err, check.IsNil)
                return string(buf)
        }
 
-       c.Check(stats(), check.Matches, `.*"StatOps":0,.*`)
+       c.Check(stats(), check.Matches, `.*"StatOps":1,.*`) // (*UnixVolume)check() calls Stat() once
        c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
 
        loc := "acbd18db4cc2f85cedef654fccc4a4d8"
-       _, err := s.volume.Get(context.Background(), loc, make([]byte, 3))
+       _, err := vol.Get(context.Background(), loc, make([]byte, 3))
        c.Check(err, check.NotNil)
        c.Check(stats(), check.Matches, `.*"StatOps":[^0],.*`)
        c.Check(stats(), check.Matches, `.*"Errors":[^0],.*`)
@@ -410,39 +397,27 @@ func (s *UnixVolumeSuite) TestStats(c *check.C) {
        c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
        c.Check(stats(), check.Matches, `.*"CreateOps":0,.*`)
 
-       err = s.volume.Put(context.Background(), loc, []byte("foo"))
+       err = vol.Put(context.Background(), loc, []byte("foo"))
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
        c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
        c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
        c.Check(stats(), check.Matches, `.*"UtimesOps":0,.*`)
 
-       err = s.volume.Touch(loc)
+       err = vol.Touch(loc)
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"FlockOps":1,.*`)
        c.Check(stats(), check.Matches, `.*"OpenOps":1,.*`)
        c.Check(stats(), check.Matches, `.*"UtimesOps":1,.*`)
 
-       _, err = s.volume.Get(context.Background(), loc, make([]byte, 3))
+       _, err = vol.Get(context.Background(), loc, make([]byte, 3))
        c.Check(err, check.IsNil)
-       err = s.volume.Compare(context.Background(), loc, []byte("foo"))
+       err = vol.Compare(context.Background(), loc, []byte("foo"))
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
        c.Check(stats(), check.Matches, `.*"OpenOps":3,.*`)
 
-       err = s.volume.Trash(loc)
+       err = vol.Trash(loc)
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"FlockOps":2,.*`)
 }
-
-func (s *UnixVolumeSuite) TestConfig(c *check.C) {
-       var cfg Config
-       err := yaml.Unmarshal([]byte(`
-Volumes:
-  - Type: Directory
-    StorageClasses: ["class_a", "class_b"]
-`), &cfg)
-
-       c.Check(err, check.IsNil)
-       c.Check(cfg.Volumes[0].GetStorageClasses(), check.DeepEquals, []string{"class_a", "class_b"})
-}
diff --git a/services/keepstore/usage.go b/services/keepstore/usage.go
deleted file mode 100644 (file)
index 8e83f6c..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "flag"
-       "fmt"
-       "os"
-       "sort"
-       "strings"
-
-       "github.com/ghodss/yaml"
-)
-
-func usage() {
-       c := DefaultConfig()
-       knownTypes := []string{}
-       for _, vt := range VolumeTypes {
-               c.Volumes = append(c.Volumes, vt().Examples()...)
-               knownTypes = append(knownTypes, vt().Type())
-       }
-       exampleConfigFile, err := yaml.Marshal(c)
-       if err != nil {
-               panic(err)
-       }
-       sort.Strings(knownTypes)
-       knownTypeList := strings.Join(knownTypes, ", ")
-       fmt.Fprintf(os.Stderr, `
-
-keepstore provides a content-addressed data store backed by a local filesystem or networked storage.
-
-Usage: keepstore -config path/to/keepstore.yml
-       keepstore [OPTIONS] -dump-config
-
-NOTE: All options (other than -config) are deprecated in favor of YAML
-      configuration. Use -dump-config to translate existing
-      configurations to YAML format.
-
-Options:
-`)
-       flag.PrintDefaults()
-       fmt.Fprintf(os.Stderr, `
-Example config file:
-
-%s
-
-Listen:
-
-    Local port to listen on. Can be "address:port" or ":port", where
-    "address" is a host IP address or name and "port" is a port number
-    or name.
-
-LogFormat:
-
-    Format of request/response and error logs: "json" or "text".
-
-PIDFile:
-
-   Path to write PID file during startup. This file is kept open and
-   locked with LOCK_EX until keepstore exits, so "fuser -k pidfile" is
-   one way to shut down. Exit immediately if there is an error
-   opening, locking, or writing the PID file.
-
-MaxBuffers:
-
-    Maximum RAM to use for data buffers, given in multiples of block
-    size (64 MiB). When this limit is reached, HTTP requests requiring
-    buffers (like GET and PUT) will wait for buffer space to be
-    released.
-
-MaxRequests:
-
-    Maximum concurrent requests. When this limit is reached, new
-    requests will receive 503 responses. Note: this limit does not
-    include idle connections from clients using HTTP keepalive, so it
-    does not strictly limit the number of concurrent connections. If
-    omitted or zero, the default is 2 * MaxBuffers.
-
-BlobSigningKeyFile:
-
-    Local file containing the secret blob signing key (used to
-    generate and verify blob signatures).  This key should be
-    identical to the API server's blob_signing_key configuration
-    entry.
-
-RequireSignatures:
-
-    Honor read requests only if a valid signature is provided.  This
-    should be true, except for development use and when migrating from
-    a very old version.
-
-BlobSignatureTTL:
-
-    Duration for which new permission signatures (returned in PUT
-    responses) will be valid.  This should be equal to the API
-    server's blob_signature_ttl configuration entry.
-
-SystemAuthTokenFile:
-
-    Local file containing the Arvados API token used by keep-balance
-    or data manager.  Delete, trash, and index requests are honored
-    only for this token.
-
-EnableDelete:
-
-    Enable trash and delete features. If false, trash lists will be
-    accepted but blocks will not be trashed or deleted.
-
-TrashLifetime:
-
-    Time duration after a block is trashed during which it can be
-    recovered using an /untrash request.
-
-TrashCheckInterval:
-
-    How often to check for (and delete) trashed blocks whose
-    TrashLifetime has expired.
-
-TrashWorkers:
-
-    Maximum number of concurrent trash operations. Default is 1, i.e.,
-    trash lists are processed serially.
-
-EmptyTrashWorkers:
-
-    Maximum number of concurrent block deletion operations (per
-    volume) when emptying trash. Default is 1.
-
-PullWorkers:
-
-    Maximum number of concurrent pull operations. Default is 1, i.e.,
-    pull lists are processed serially.
-
-TLSCertificateFile:
-
-    Path to server certificate file in X509 format. Enables TLS mode.
-
-    Example: /var/lib/acme/live/keep0.example.com/fullchain
-
-TLSKeyFile:
-
-    Path to server key file in X509 format. Enables TLS mode.
-
-    The key pair is read from disk during startup, and whenever SIGHUP
-    is received.
-
-    Example: /var/lib/acme/live/keep0.example.com/privkey
-
-Volumes:
-
-    List of storage volumes. If omitted or empty, the default is to
-    use all directories named "keep" that exist in the top level
-    directory of a mount point at startup time.
-
-    Volume types: %s
-
-    (See volume configuration examples above.)
-
-`, exampleConfigFile, knownTypeList)
-}
index 52b9b1b244c0a7032c66a2ea12b8d867c2384940..1dea6194d51c88b195f6155392b233b879cbeee8 100644 (file)
@@ -14,6 +14,7 @@ import (
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/sirupsen/logrus"
 )
 
 type BlockWriter interface {
@@ -28,19 +29,12 @@ type BlockReader interface {
        ReadBlock(ctx context.Context, loc string, w io.Writer) error
 }
 
+var driver = map[string]func(*arvados.Cluster, arvados.Volume, logrus.FieldLogger, *volumeMetricsVecs) (Volume, error){}
+
 // A Volume is an interface representing a Keep back-end storage unit:
 // for example, a single mounted disk, a RAID array, an Amazon S3 volume,
 // etc.
 type Volume interface {
-       // Volume type as specified in config file. Examples: "S3",
-       // "Directory".
-       Type() string
-
-       // Do whatever private setup tasks and configuration checks
-       // are needed. Return non-nil if the volume is unusable (e.g.,
-       // invalid config).
-       Start(vm *volumeMetricsVecs) error
-
        // Get a block: copy the block data into buf, and return the
        // number of bytes copied.
        //
@@ -176,12 +170,12 @@ type Volume interface {
 
        // Trash moves the block data from the underlying storage
        // device to trash area. The block then stays in trash for
-       // -trash-lifetime interval before it is actually deleted.
+       // BlobTrashLifetime before it is actually deleted.
        //
        // loc is as described in Get.
        //
        // If the timestamp for the given locator is newer than
-       // BlobSignatureTTL, Trash must not trash the data.
+       // BlobSigningTTL, Trash must not trash the data.
        //
        // If a Trash operation overlaps with any Touch or Put
        // operations on the same locator, the implementation must
@@ -202,8 +196,7 @@ type Volume interface {
        // reliably or fail outright.
        //
        // Corollary: A successful Touch or Put guarantees a block
-       // will not be trashed for at least BlobSignatureTTL
-       // seconds.
+       // will not be trashed for at least BlobSigningTTL seconds.
        Trash(loc string) error
 
        // Untrash moves block from trash back into store
@@ -222,29 +215,13 @@ type Volume interface {
        // secrets.
        String() string
 
-       // Writable returns false if all future Put, Mtime, and Delete
-       // calls are expected to fail.
-       //
-       // If the volume is only temporarily unwritable -- or if Put
-       // will fail because it is full, but Mtime or Delete can
-       // succeed -- then Writable should return false.
-       Writable() bool
-
-       // Replication returns the storage redundancy of the
-       // underlying device. It will be passed on to clients in
-       // responses to PUT requests.
-       Replication() int
-
-       // EmptyTrash looks for trashed blocks that exceeded TrashLifetime
-       // and deletes them from the volume.
+       // EmptyTrash looks for trashed blocks that exceeded
+       // BlobTrashLifetime and deletes them from the volume.
        EmptyTrash()
 
        // Return a globally unique ID of the underlying storage
        // device if possible, otherwise "".
-       DeviceID() string
-
-       // Get the storage classes associated with this volume
-       GetStorageClasses() []string
+       GetDeviceID() string
 }
 
 // A VolumeWithExamples provides example configs to display in the
@@ -260,24 +237,24 @@ type VolumeManager interface {
        // Mounts returns all mounts (volume attachments).
        Mounts() []*VolumeMount
 
-       // Lookup returns the volume under the given mount
-       // UUID. Returns nil if the mount does not exist. If
-       // write==true, returns nil if the volume is not writable.
-       Lookup(uuid string, write bool) Volume
+       // Lookup returns the mount with the given UUID. Returns nil
+       // if the mount does not exist. If write==true, returns nil if
+       // the mount is not writable.
+       Lookup(uuid string, write bool) *VolumeMount
 
-       // AllReadable returns all volumes.
-       AllReadable() []Volume
+       // AllReadable returns all mounts.
+       AllReadable() []*VolumeMount
 
-       // AllWritable returns all volumes that aren't known to be in
+       // AllWritable returns all mounts that aren't known to be in
        // a read-only state. (There is no guarantee that a write to
        // one will succeed, though.)
-       AllWritable() []Volume
+       AllWritable() []*VolumeMount
 
        // NextWritable returns the volume where the next new block
        // should be written. A VolumeManager can select a volume in
        // order to distribute activity across spindles, fill up disks
        // with more free space, etc.
-       NextWritable() Volume
+       NextWritable() *VolumeMount
 
        // VolumeStats returns the ioStats used for tracking stats for
        // the given Volume.
@@ -290,7 +267,7 @@ type VolumeManager interface {
 // A VolumeMount is an attachment of a Volume to a VolumeManager.
 type VolumeMount struct {
        arvados.KeepMount
-       volume Volume
+       Volume
 }
 
 // Generate a UUID the way API server would for a "KeepVolumeMount"
@@ -314,68 +291,85 @@ func (*VolumeMount) generateUUID() string {
 type RRVolumeManager struct {
        mounts    []*VolumeMount
        mountMap  map[string]*VolumeMount
-       readables []Volume
-       writables []Volume
+       readables []*VolumeMount
+       writables []*VolumeMount
        counter   uint32
        iostats   map[Volume]*ioStats
 }
 
-// MakeRRVolumeManager initializes RRVolumeManager
-func MakeRRVolumeManager(volumes []Volume) *RRVolumeManager {
+func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, myURL arvados.URL, metrics *volumeMetricsVecs) (*RRVolumeManager, error) {
        vm := &RRVolumeManager{
                iostats: make(map[Volume]*ioStats),
        }
        vm.mountMap = make(map[string]*VolumeMount)
-       for _, v := range volumes {
-               sc := v.GetStorageClasses()
+       for uuid, cfgvol := range cluster.Volumes {
+               va, ok := cfgvol.AccessViaHosts[myURL]
+               if !ok && len(cfgvol.AccessViaHosts) > 0 {
+                       continue
+               }
+               dri, ok := driver[cfgvol.Driver]
+               if !ok {
+                       return nil, fmt.Errorf("volume %s: invalid driver %q", uuid, cfgvol.Driver)
+               }
+               vol, err := dri(cluster, cfgvol, logger, metrics)
+               if err != nil {
+                       return nil, fmt.Errorf("error initializing volume %s: %s", uuid, err)
+               }
+               logger.Printf("started volume %s (%s), ReadOnly=%v", uuid, vol, cfgvol.ReadOnly)
+
+               sc := cfgvol.StorageClasses
                if len(sc) == 0 {
-                       sc = []string{"default"}
+                       sc = map[string]bool{"default": true}
+               }
+               repl := cfgvol.Replication
+               if repl < 1 {
+                       repl = 1
                }
                mnt := &VolumeMount{
                        KeepMount: arvados.KeepMount{
-                               UUID:           (*VolumeMount)(nil).generateUUID(),
-                               DeviceID:       v.DeviceID(),
-                               ReadOnly:       !v.Writable(),
-                               Replication:    v.Replication(),
+                               UUID:           uuid,
+                               DeviceID:       vol.GetDeviceID(),
+                               ReadOnly:       cfgvol.ReadOnly || va.ReadOnly,
+                               Replication:    repl,
                                StorageClasses: sc,
                        },
-                       volume: v,
+                       Volume: vol,
                }
-               vm.iostats[v] = &ioStats{}
+               vm.iostats[vol] = &ioStats{}
                vm.mounts = append(vm.mounts, mnt)
-               vm.mountMap[mnt.UUID] = mnt
-               vm.readables = append(vm.readables, v)
-               if v.Writable() {
-                       vm.writables = append(vm.writables, v)
+               vm.mountMap[uuid] = mnt
+               vm.readables = append(vm.readables, mnt)
+               if !mnt.KeepMount.ReadOnly {
+                       vm.writables = append(vm.writables, mnt)
                }
        }
-       return vm
+       return vm, nil
 }
 
 func (vm *RRVolumeManager) Mounts() []*VolumeMount {
        return vm.mounts
 }
 
-func (vm *RRVolumeManager) Lookup(uuid string, needWrite bool) Volume {
+func (vm *RRVolumeManager) Lookup(uuid string, needWrite bool) *VolumeMount {
        if mnt, ok := vm.mountMap[uuid]; ok && (!needWrite || !mnt.ReadOnly) {
-               return mnt.volume
+               return mnt
        } else {
                return nil
        }
 }
 
 // AllReadable returns an array of all readable volumes
-func (vm *RRVolumeManager) AllReadable() []Volume {
+func (vm *RRVolumeManager) AllReadable() []*VolumeMount {
        return vm.readables
 }
 
 // AllWritable returns an array of all writable volumes
-func (vm *RRVolumeManager) AllWritable() []Volume {
+func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
        return vm.writables
 }
 
 // NextWritable returns the next writable
-func (vm *RRVolumeManager) NextWritable() Volume {
+func (vm *RRVolumeManager) NextWritable() *VolumeMount {
        if len(vm.writables) == 0 {
                return nil
        }
index d5a413693f6c46c1d8241838a1ea87581191f9a4..cbb0eb3a3a68769c3ad9133e93a77ade565d35b2 100644 (file)
@@ -18,8 +18,10 @@ import (
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/prometheus/client_golang/prometheus"
        dto "github.com/prometheus/client_model/go"
+       "github.com/sirupsen/logrus"
 )
 
 type TB interface {
@@ -37,65 +39,96 @@ type TB interface {
 // A TestableVolumeFactory returns a new TestableVolume. The factory
 // function, and the TestableVolume it returns, can use "t" to write
 // logs, fail the current test, etc.
-type TestableVolumeFactory func(t TB) TestableVolume
+type TestableVolumeFactory func(t TB, cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) TestableVolume
 
 // DoGenericVolumeTests runs a set of tests that every TestableVolume
 // is expected to pass. It calls factory to create a new TestableVolume
 // for each test case, to avoid leaking state between tests.
-func DoGenericVolumeTests(t TB, factory TestableVolumeFactory) {
-       testGet(t, factory)
-       testGetNoSuchBlock(t, factory)
+func DoGenericVolumeTests(t TB, readonly bool, factory TestableVolumeFactory) {
+       var s genericVolumeSuite
+       s.volume.ReadOnly = readonly
 
-       testCompareNonexistent(t, factory)
-       testCompareSameContent(t, factory, TestHash, TestBlock)
-       testCompareSameContent(t, factory, EmptyHash, EmptyBlock)
-       testCompareWithCollision(t, factory, TestHash, TestBlock, []byte("baddata"))
-       testCompareWithCollision(t, factory, TestHash, TestBlock, EmptyBlock)
-       testCompareWithCollision(t, factory, EmptyHash, EmptyBlock, TestBlock)
-       testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, []byte("baddata"))
-       testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, EmptyBlock)
-       testCompareWithCorruptStoredData(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
+       s.testGet(t, factory)
+       s.testGetNoSuchBlock(t, factory)
 
-       testPutBlockWithSameContent(t, factory, TestHash, TestBlock)
-       testPutBlockWithSameContent(t, factory, EmptyHash, EmptyBlock)
-       testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, arvadostest.MD5CollisionData[0], arvadostest.MD5CollisionData[1])
-       testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, EmptyBlock, arvadostest.MD5CollisionData[0])
-       testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, arvadostest.MD5CollisionData[0], EmptyBlock)
-       testPutBlockWithDifferentContent(t, factory, EmptyHash, EmptyBlock, arvadostest.MD5CollisionData[0])
-       testPutMultipleBlocks(t, factory)
+       s.testCompareNonexistent(t, factory)
+       s.testCompareSameContent(t, factory, TestHash, TestBlock)
+       s.testCompareSameContent(t, factory, EmptyHash, EmptyBlock)
+       s.testCompareWithCollision(t, factory, TestHash, TestBlock, []byte("baddata"))
+       s.testCompareWithCollision(t, factory, TestHash, TestBlock, EmptyBlock)
+       s.testCompareWithCollision(t, factory, EmptyHash, EmptyBlock, TestBlock)
+       s.testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, []byte("baddata"))
+       s.testCompareWithCorruptStoredData(t, factory, TestHash, TestBlock, EmptyBlock)
+       s.testCompareWithCorruptStoredData(t, factory, EmptyHash, EmptyBlock, []byte("baddata"))
 
-       testPutAndTouch(t, factory)
-       testTouchNoSuchBlock(t, factory)
+       if !readonly {
+               s.testPutBlockWithSameContent(t, factory, TestHash, TestBlock)
+               s.testPutBlockWithSameContent(t, factory, EmptyHash, EmptyBlock)
+               s.testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, arvadostest.MD5CollisionData[0], arvadostest.MD5CollisionData[1])
+               s.testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, EmptyBlock, arvadostest.MD5CollisionData[0])
+               s.testPutBlockWithDifferentContent(t, factory, arvadostest.MD5CollisionMD5, arvadostest.MD5CollisionData[0], EmptyBlock)
+               s.testPutBlockWithDifferentContent(t, factory, EmptyHash, EmptyBlock, arvadostest.MD5CollisionData[0])
+               s.testPutMultipleBlocks(t, factory)
 
-       testMtimeNoSuchBlock(t, factory)
+               s.testPutAndTouch(t, factory)
+       }
+       s.testTouchNoSuchBlock(t, factory)
+
+       s.testMtimeNoSuchBlock(t, factory)
+
+       s.testIndexTo(t, factory)
 
-       testIndexTo(t, factory)
+       if !readonly {
+               s.testDeleteNewBlock(t, factory)
+               s.testDeleteOldBlock(t, factory)
+       }
+       s.testDeleteNoSuchBlock(t, factory)
 
-       testDeleteNewBlock(t, factory)
-       testDeleteOldBlock(t, factory)
-       testDeleteNoSuchBlock(t, factory)
+       s.testStatus(t, factory)
 
-       testStatus(t, factory)
+       s.testMetrics(t, readonly, factory)
 
-       testMetrics(t, factory)
+       s.testString(t, factory)
 
-       testString(t, factory)
+       if readonly {
+               s.testUpdateReadOnly(t, factory)
+       }
 
-       testUpdateReadOnly(t, factory)
+       s.testGetConcurrent(t, factory)
+       if !readonly {
+               s.testPutConcurrent(t, factory)
 
-       testGetConcurrent(t, factory)
-       testPutConcurrent(t, factory)
+               s.testPutFullBlock(t, factory)
+       }
 
-       testPutFullBlock(t, factory)
+       s.testTrashUntrash(t, readonly, factory)
+       s.testTrashEmptyTrashUntrash(t, factory)
+}
+
+type genericVolumeSuite struct {
+       cluster  *arvados.Cluster
+       volume   arvados.Volume
+       logger   logrus.FieldLogger
+       metrics  *volumeMetricsVecs
+       registry *prometheus.Registry
+}
+
+func (s *genericVolumeSuite) setup(t TB) {
+       s.cluster = testCluster(t)
+       s.logger = ctxlog.TestLogger(t)
+       s.registry = prometheus.NewRegistry()
+       s.metrics = newVolumeMetricsVecs(s.registry)
+}
 
-       testTrashUntrash(t, factory)
-       testTrashEmptyTrashUntrash(t, factory)
+func (s *genericVolumeSuite) newVolume(t TB, factory TestableVolumeFactory) TestableVolume {
+       return factory(t, s.cluster, s.volume, s.logger, s.metrics)
 }
 
 // Put a test block, get it and verify content
 // Test should pass for both writable and read-only volumes
-func testGet(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testGet(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        v.PutRaw(TestHash, TestBlock)
@@ -113,8 +146,9 @@ func testGet(t TB, factory TestableVolumeFactory) {
 
 // Invoke get on a block that does not exist in volume; should result in error
 // Test should pass for both writable and read-only volumes
-func testGetNoSuchBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testGetNoSuchBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        buf := make([]byte, BlockSize)
@@ -126,8 +160,9 @@ func testGetNoSuchBlock(t TB, factory TestableVolumeFactory) {
 // Compare() should return os.ErrNotExist if the block does not exist.
 // Otherwise, writing new data causes CompareAndTouch() to generate
 // error logs even though everything is working fine.
-func testCompareNonexistent(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testCompareNonexistent(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        err := v.Compare(context.Background(), TestHash, TestBlock)
@@ -138,8 +173,9 @@ func testCompareNonexistent(t TB, factory TestableVolumeFactory) {
 
 // Put a test block and compare the locator with same content
 // Test should pass for both writable and read-only volumes
-func testCompareSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
-       v := factory(t)
+func (s *genericVolumeSuite) testCompareSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        v.PutRaw(testHash, testData)
@@ -156,8 +192,9 @@ func testCompareSameContent(t TB, factory TestableVolumeFactory, testHash string
 // testHash = md5(testDataA).
 //
 // Test should pass for both writable and read-only volumes
-func testCompareWithCollision(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
-       v := factory(t)
+func (s *genericVolumeSuite) testCompareWithCollision(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        v.PutRaw(testHash, testDataA)
@@ -173,8 +210,9 @@ func testCompareWithCollision(t TB, factory TestableVolumeFactory, testHash stri
 // corrupted. Requires testHash = md5(testDataA) != md5(testDataB).
 //
 // Test should pass for both writable and read-only volumes
-func testCompareWithCorruptStoredData(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
-       v := factory(t)
+func (s *genericVolumeSuite) testCompareWithCorruptStoredData(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        v.PutRaw(TestHash, testDataB)
@@ -187,14 +225,11 @@ func testCompareWithCorruptStoredData(t TB, factory TestableVolumeFactory, testH
 
 // Put a block and put again with same content
 // Test is intended for only writable volumes
-func testPutBlockWithSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
-       v := factory(t)
+func (s *genericVolumeSuite) testPutBlockWithSameContent(t TB, factory TestableVolumeFactory, testHash string, testData []byte) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if v.Writable() == false {
-               return
-       }
-
        err := v.Put(context.Background(), testHash, testData)
        if err != nil {
                t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
@@ -208,14 +243,11 @@ func testPutBlockWithSameContent(t TB, factory TestableVolumeFactory, testHash s
 
 // Put a block and put again with different content
 // Test is intended for only writable volumes
-func testPutBlockWithDifferentContent(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
-       v := factory(t)
+func (s *genericVolumeSuite) testPutBlockWithDifferentContent(t TB, factory TestableVolumeFactory, testHash string, testDataA, testDataB []byte) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if v.Writable() == false {
-               return
-       }
-
        v.PutRaw(testHash, testDataA)
 
        putErr := v.Put(context.Background(), testHash, testDataB)
@@ -239,14 +271,11 @@ func testPutBlockWithDifferentContent(t TB, factory TestableVolumeFactory, testH
 
 // Put and get multiple blocks
 // Test is intended for only writable volumes
-func testPutMultipleBlocks(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testPutMultipleBlocks(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if v.Writable() == false {
-               return
-       }
-
        err := v.Put(context.Background(), TestHash, TestBlock)
        if err != nil {
                t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
@@ -295,14 +324,11 @@ func testPutMultipleBlocks(t TB, factory TestableVolumeFactory) {
 //   Test that when applying PUT to a block that already exists,
 //   the block's modification time is updated.
 // Test is intended for only writable volumes
-func testPutAndTouch(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testPutAndTouch(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if v.Writable() == false {
-               return
-       }
-
        if err := v.Put(context.Background(), TestHash, TestBlock); err != nil {
                t.Error(err)
        }
@@ -337,8 +363,9 @@ func testPutAndTouch(t TB, factory TestableVolumeFactory) {
 
 // Touching a non-existing block should result in error.
 // Test should pass for both writable and read-only volumes
-func testTouchNoSuchBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testTouchNoSuchBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        if err := v.Touch(TestHash); err == nil {
@@ -348,8 +375,9 @@ func testTouchNoSuchBlock(t TB, factory TestableVolumeFactory) {
 
 // Invoking Mtime on a non-existing block should result in error.
 // Test should pass for both writable and read-only volumes
-func testMtimeNoSuchBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testMtimeNoSuchBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        if _, err := v.Mtime("12345678901234567890123456789012"); err == nil {
@@ -362,8 +390,9 @@ func testMtimeNoSuchBlock(t TB, factory TestableVolumeFactory) {
 // * with a prefix
 // * with no such prefix
 // Test should pass for both writable and read-only volumes
-func testIndexTo(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testIndexTo(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        // minMtime and maxMtime are the minimum and maximum
@@ -437,14 +466,11 @@ func testIndexTo(t TB, factory TestableVolumeFactory) {
 // Calling Delete() for a block immediately after writing it (not old enough)
 // should neither delete the data nor return an error.
 // Test is intended for only writable volumes
-func testDeleteNewBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testDeleteNewBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       s.cluster.Collections.BlobSigningTTL.Set("5m")
+       v := s.newVolume(t, factory)
        defer v.Teardown()
-       theConfig.BlobSignatureTTL.Set("5m")
-
-       if v.Writable() == false {
-               return
-       }
 
        v.Put(context.Background(), TestHash, TestBlock)
 
@@ -461,19 +487,16 @@ func testDeleteNewBlock(t TB, factory TestableVolumeFactory) {
 }
 
 // Calling Delete() for a block with a timestamp older than
-// BlobSignatureTTL seconds in the past should delete the data.
-// Test is intended for only writable volumes
-func testDeleteOldBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+// BlobSigningTTL seconds in the past should delete the data.  Test is
+// intended for only writable volumes
+func (s *genericVolumeSuite) testDeleteOldBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       s.cluster.Collections.BlobSigningTTL.Set("5m")
+       v := s.newVolume(t, factory)
        defer v.Teardown()
-       theConfig.BlobSignatureTTL.Set("5m")
-
-       if v.Writable() == false {
-               return
-       }
 
        v.Put(context.Background(), TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
        if err := v.Trash(TestHash); err != nil {
                t.Error(err)
@@ -507,8 +530,9 @@ func testDeleteOldBlock(t TB, factory TestableVolumeFactory) {
 
 // Calling Delete() for a block that does not exist should result in error.
 // Test should pass for both writable and read-only volumes
-func testDeleteNoSuchBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testDeleteNoSuchBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        if err := v.Trash(TestHash2); err == nil {
@@ -518,8 +542,9 @@ func testDeleteNoSuchBlock(t TB, factory TestableVolumeFactory) {
 
 // Invoke Status and verify that VolumeStatus is returned
 // Test should pass for both writable and read-only volumes
-func testStatus(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testStatus(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        // Get node status and make a basic sanity check.
@@ -544,19 +569,14 @@ func getValueFrom(cv *prometheus.CounterVec, lbls prometheus.Labels) float64 {
        return pb.GetCounter().GetValue()
 }
 
-func testMetrics(t TB, factory TestableVolumeFactory) {
+func (s *genericVolumeSuite) testMetrics(t TB, readonly bool, factory TestableVolumeFactory) {
        var err error
 
-       v := factory(t)
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
-       reg := prometheus.NewRegistry()
-       vm := newVolumeMetricsVecs(reg)
 
-       err = v.Start(vm)
-       if err != nil {
-               t.Error("Failed Start(): ", err)
-       }
-       opsC, _, ioC := vm.getCounterVecsFor(prometheus.Labels{"device_id": v.DeviceID()})
+       opsC, _, ioC := s.metrics.getCounterVecsFor(prometheus.Labels{"device_id": v.GetDeviceID()})
 
        if ioC == nil {
                t.Error("ioBytes CounterVec is nil")
@@ -580,7 +600,7 @@ func testMetrics(t TB, factory TestableVolumeFactory) {
        readOpCounter = getValueFrom(opsC, prometheus.Labels{"operation": readOpType})
 
        // Test Put if volume is writable
-       if v.Writable() {
+       if !readonly {
                err = v.Put(context.Background(), TestHash, TestBlock)
                if err != nil {
                        t.Errorf("Got err putting block %q: %q, expected nil", TestBlock, err)
@@ -617,8 +637,9 @@ func testMetrics(t TB, factory TestableVolumeFactory) {
 
 // Invoke String for the volume; expect non-empty result
 // Test should pass for both writable and read-only volumes
-func testString(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testString(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        if id := v.String(); len(id) == 0 {
@@ -628,14 +649,11 @@ func testString(t TB, factory TestableVolumeFactory) {
 
 // Putting, updating, touching, and deleting blocks from a read-only volume result in error.
 // Test is intended for only read-only volumes
-func testUpdateReadOnly(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testUpdateReadOnly(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if v.Writable() == true {
-               return
-       }
-
        v.PutRaw(TestHash, TestBlock)
        buf := make([]byte, BlockSize)
 
@@ -676,8 +694,9 @@ func testUpdateReadOnly(t TB, factory TestableVolumeFactory) {
 
 // Launch concurrent Gets
 // Test should pass for both writable and read-only volumes
-func testGetConcurrent(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testGetConcurrent(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
        v.PutRaw(TestHash, TestBlock)
@@ -729,14 +748,11 @@ func testGetConcurrent(t TB, factory TestableVolumeFactory) {
 
 // Launch concurrent Puts
 // Test is intended for only writable volumes
-func testPutConcurrent(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testPutConcurrent(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if v.Writable() == false {
-               return
-       }
-
        sem := make(chan int)
        go func(sem chan int) {
                err := v.Put(context.Background(), TestHash, TestBlock)
@@ -795,14 +811,11 @@ func testPutConcurrent(t TB, factory TestableVolumeFactory) {
 }
 
 // Write and read back a full size block
-func testPutFullBlock(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testPutFullBlock(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
 
-       if !v.Writable() {
-               return
-       }
-
        wdata := make([]byte, BlockSize)
        wdata[0] = 'a'
        wdata[BlockSize-1] = 'z'
@@ -821,22 +834,19 @@ func testPutFullBlock(t TB, factory TestableVolumeFactory) {
        }
 }
 
-// With TrashLifetime != 0, perform:
+// With BlobTrashLifetime != 0, perform:
 // Trash an old block - which either raises ErrNotImplemented or succeeds
 // Untrash -  which either raises ErrNotImplemented or succeeds
 // Get - which must succeed
-func testTrashUntrash(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testTrashUntrash(t TB, readonly bool, factory TestableVolumeFactory) {
+       s.setup(t)
+       s.cluster.Collections.BlobTrashLifetime.Set("1h")
+       v := s.newVolume(t, factory)
        defer v.Teardown()
-       defer func() {
-               theConfig.TrashLifetime = 0
-       }()
-
-       theConfig.TrashLifetime.Set("1h")
 
        // put block and backdate it
        v.PutRaw(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
        buf := make([]byte, BlockSize)
        n, err := v.Get(context.Background(), TestHash, buf)
@@ -849,7 +859,7 @@ func testTrashUntrash(t TB, factory TestableVolumeFactory) {
 
        // Trash
        err = v.Trash(TestHash)
-       if v.Writable() == false {
+       if readonly {
                if err != MethodDisabledError {
                        t.Fatal(err)
                }
@@ -880,12 +890,10 @@ func testTrashUntrash(t TB, factory TestableVolumeFactory) {
        }
 }
 
-func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
-       v := factory(t)
+func (s *genericVolumeSuite) testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
+       s.setup(t)
+       v := s.newVolume(t, factory)
        defer v.Teardown()
-       defer func(orig arvados.Duration) {
-               theConfig.TrashLifetime = orig
-       }(theConfig.TrashLifetime)
 
        checkGet := func() error {
                buf := make([]byte, BlockSize)
@@ -918,10 +926,10 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
 
        // First set: EmptyTrash before reaching the trash deadline.
 
-       theConfig.TrashLifetime.Set("1h")
+       s.cluster.Collections.BlobTrashLifetime.Set("1h")
 
        v.PutRaw(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
        err := checkGet()
        if err != nil {
@@ -932,7 +940,8 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        err = v.Trash(TestHash)
        if err == MethodDisabledError || err == ErrNotImplemented {
                // Skip the trash tests for read-only volumes, and
-               // volume types that don't support TrashLifetime>0.
+               // volume types that don't support
+               // BlobTrashLifetime>0.
                return
        }
 
@@ -966,7 +975,7 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        }
 
        // Because we Touch'ed, need to backdate again for next set of tests
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
        // If the only block in the trash has already been untrashed,
        // most volumes will fail a subsequent Untrash with a 404, but
@@ -984,11 +993,11 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        }
 
        // Untrash might have updated the timestamp, so backdate again
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
        // Second set: EmptyTrash after the trash deadline has passed.
 
-       theConfig.TrashLifetime.Set("1ns")
+       s.cluster.Collections.BlobTrashLifetime.Set("1ns")
 
        err = v.Trash(TestHash)
        if err != nil {
@@ -1013,7 +1022,7 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        // Trash it again, and this time call EmptyTrash so it really
        // goes away.
        // (In Azure volumes, un/trash changes Mtime, so first backdate again)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
        _ = v.Trash(TestHash)
        err = checkGet()
        if err == nil || !os.IsNotExist(err) {
@@ -1038,9 +1047,9 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        // un-trashed copy doesn't get deleted along with it.
 
        v.PutRaw(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
-       theConfig.TrashLifetime.Set("1ns")
+       s.cluster.Collections.BlobTrashLifetime.Set("1ns")
        err = v.Trash(TestHash)
        if err != nil {
                t.Fatal(err)
@@ -1051,7 +1060,7 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        }
 
        v.PutRaw(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
        // EmptyTrash should not delete the untrashed copy.
        v.EmptyTrash()
@@ -1066,18 +1075,18 @@ func testTrashEmptyTrashUntrash(t TB, factory TestableVolumeFactory) {
        // untrash the block whose deadline is "C".
 
        v.PutRaw(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
-       theConfig.TrashLifetime.Set("1ns")
+       s.cluster.Collections.BlobTrashLifetime.Set("1ns")
        err = v.Trash(TestHash)
        if err != nil {
                t.Fatal(err)
        }
 
        v.PutRaw(TestHash, TestBlock)
-       v.TouchWithDate(TestHash, time.Now().Add(-2*theConfig.BlobSignatureTTL.Duration()))
+       v.TouchWithDate(TestHash, time.Now().Add(-2*s.cluster.Collections.BlobSigningTTL.Duration()))
 
-       theConfig.TrashLifetime.Set("1h")
+       s.cluster.Collections.BlobTrashLifetime.Set("1h")
        err = v.Trash(TestHash)
        if err != nil {
                t.Fatal(err)
index 0b8af330fb2d86f771926f07f5f38a34cf09b8ef..62582d309feeb1722d99a5fc7aeae6ededc58676 100644 (file)
@@ -15,6 +15,28 @@ import (
        "strings"
        "sync"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/sirupsen/logrus"
+)
+
+var (
+       TestBlock       = []byte("The quick brown fox jumps over the lazy dog.")
+       TestHash        = "e4d909c290d0fb1ca068ffaddf22cbd0"
+       TestHashPutResp = "e4d909c290d0fb1ca068ffaddf22cbd0+44\n"
+
+       TestBlock2 = []byte("Pack my box with five dozen liquor jugs.")
+       TestHash2  = "f15ac516f788aec4f30932ffb6395c39"
+
+       TestBlock3 = []byte("Now is the time for all good men to come to the aid of their country.")
+       TestHash3  = "eed29bbffbc2dbe5e5ee0bb71888e61f"
+
+       // BadBlock is used to test collisions and corruption.
+       // It must not match any test hashes.
+       BadBlock = []byte("The magic words are squeamish ossifrage.")
+
+       EmptyHash  = "d41d8cd98f00b204e9800998ecf8427e"
+       EmptyBlock = []byte("")
 )
 
 // A TestableVolume allows test suites to manipulate the state of an
@@ -38,6 +60,10 @@ type TestableVolume interface {
        Teardown()
 }
 
+func init() {
+       driver["mock"] = newMockVolume
+}
+
 // MockVolumes are test doubles for Volumes, used to test handlers.
 type MockVolume struct {
        Store      map[string][]byte
@@ -51,10 +77,6 @@ type MockVolume struct {
        // that has been Put().
        Touchable bool
 
-       // Readonly volumes return an error for Put, Delete, and
-       // Touch.
-       Readonly bool
-
        // Gate is a "starting gate", allowing test cases to pause
        // volume operations long enough to inspect state. Every
        // operation (except Status) starts by receiving from
@@ -62,15 +84,19 @@ type MockVolume struct {
        // channel unblocks all operations. By default, Gate is a
        // closed channel, so all operations proceed without
        // blocking. See trash_worker_test.go for an example.
-       Gate chan struct{}
-
-       called map[string]int
-       mutex  sync.Mutex
+       Gate chan struct{} `json:"-"`
+
+       cluster *arvados.Cluster
+       volume  arvados.Volume
+       logger  logrus.FieldLogger
+       metrics *volumeMetricsVecs
+       called  map[string]int
+       mutex   sync.Mutex
 }
 
-// CreateMockVolume returns a non-Bad, non-Readonly, Touchable mock
+// newMockVolume returns a non-Bad, non-Readonly, Touchable mock
 // volume.
-func CreateMockVolume() *MockVolume {
+func newMockVolume(cluster *arvados.Cluster, volume arvados.Volume, logger logrus.FieldLogger, metrics *volumeMetricsVecs) (Volume, error) {
        gate := make(chan struct{})
        close(gate)
        return &MockVolume{
@@ -78,10 +104,13 @@ func CreateMockVolume() *MockVolume {
                Timestamps: make(map[string]time.Time),
                Bad:        false,
                Touchable:  true,
-               Readonly:   false,
                called:     map[string]int{},
                Gate:       gate,
-       }
+               cluster:    cluster,
+               volume:     volume,
+               logger:     logger,
+               metrics:    metrics,
+       }, nil
 }
 
 // CallCount returns how many times the named method has been called.
@@ -141,7 +170,7 @@ func (v *MockVolume) Put(ctx context.Context, loc string, block []byte) error {
        if v.Bad {
                return v.BadVolumeError
        }
-       if v.Readonly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        v.Store[loc] = block
@@ -151,7 +180,7 @@ func (v *MockVolume) Put(ctx context.Context, loc string, block []byte) error {
 func (v *MockVolume) Touch(loc string) error {
        v.gotCall("Touch")
        <-v.Gate
-       if v.Readonly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        if v.Touchable {
@@ -195,11 +224,11 @@ func (v *MockVolume) IndexTo(prefix string, w io.Writer) error {
 func (v *MockVolume) Trash(loc string) error {
        v.gotCall("Delete")
        <-v.Gate
-       if v.Readonly {
+       if v.volume.ReadOnly {
                return MethodDisabledError
        }
        if _, ok := v.Store[loc]; ok {
-               if time.Since(v.Timestamps[loc]) < time.Duration(theConfig.BlobSignatureTTL) {
+               if time.Since(v.Timestamps[loc]) < time.Duration(v.cluster.Collections.BlobSigningTTL) {
                        return nil
                }
                delete(v.Store, loc)
@@ -208,18 +237,10 @@ func (v *MockVolume) Trash(loc string) error {
        return os.ErrNotExist
 }
 
-func (v *MockVolume) DeviceID() string {
+func (v *MockVolume) GetDeviceID() string {
        return "mock-device-id"
 }
 
-func (v *MockVolume) Type() string {
-       return "Mock"
-}
-
-func (v *MockVolume) Start(vm *volumeMetricsVecs) error {
-       return nil
-}
-
 func (v *MockVolume) Untrash(loc string) error {
        return nil
 }
@@ -236,14 +257,6 @@ func (v *MockVolume) String() string {
        return "[MockVolume]"
 }
 
-func (v *MockVolume) Writable() bool {
-       return !v.Readonly
-}
-
-func (v *MockVolume) Replication() int {
-       return 1
-}
-
 func (v *MockVolume) EmptyTrash() {
 }
 
index 70b666af8460a6ca7731d590cf6e8bf77a4af31a..5f163e87c32188e04ac4a05573ad9d922f185a07 100644 (file)
@@ -1,8 +1,9 @@
 PATH
   remote: .
   specs:
-    arvados-login-sync (1.4.0.20190729193732)
+    arvados-login-sync (1.4.1.20190930204434)
       arvados (~> 1.3.0, >= 1.3.0)
+      faraday (< 0.16)
 
 GEM
   remote: https://rubygems.org/
@@ -12,8 +13,8 @@ GEM
       i18n (>= 0.7, < 2)
       minitest (~> 5.1)
       tzinfo (~> 1.1)
-    addressable (2.6.0)
-      public_suffix (>= 2.0.2, < 4.0)
+    addressable (2.7.0)
+      public_suffix (>= 2.0.2, < 5.0)
     andand (1.3.3)
     arvados (1.3.3.20190320201707)
       activesupport (>= 3)
@@ -41,7 +42,7 @@ GEM
     extlib (0.9.16)
     faraday (0.15.4)
       multipart-post (>= 1.2, < 3)
-    googleauth (0.8.1)
+    googleauth (0.9.0)
       faraday (~> 0.12)
       jwt (>= 1.4, < 3.0)
       memoist (~> 0.16)
@@ -62,7 +63,7 @@ GEM
     multi_json (1.13.1)
     multipart-post (2.1.1)
     os (1.0.1)
-    public_suffix (3.1.1)
+    public_suffix (4.0.1)
     rake (12.3.2)
     retriable (1.4.1)
     signet (0.11.0)
index f998a8f35211c89ae81dd89def87d5aef9d46412..36e5ed925283676d28f53a51e021bbfe118925de 100644 (file)
@@ -25,6 +25,8 @@ Gem::Specification.new do |s|
   s.executables << "arvados-login-sync"
   s.required_ruby_version = '>= 2.1.0'
   s.add_runtime_dependency 'arvados', '~> 1.3.0', '>= 1.3.0'
+  # arvados-google-api-client 0.8.7.2 is incompatible with faraday 0.16.2
+  s.add_dependency('faraday', '< 0.16')
   s.homepage    =
     'https://arvados.org'
 end
index 7eef27258b58b1c2b9a3d81b8ece6f4b35ffa019..3da571e69651cc597d5816391eef82de6f0f810b 100644 (file)
@@ -15,6 +15,7 @@ import (
        "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
@@ -28,13 +29,13 @@ type serverSuite struct {
 
 func (s *serverSuite) SetUpTest(c *check.C) {
        var err error
-       s.cluster, err = s.testConfig()
+       s.cluster, err = s.testConfig(c)
        c.Assert(err, check.IsNil)
        s.srv = &server{cluster: s.cluster}
 }
 
-func (*serverSuite) testConfig() (*arvados.Cluster, error) {
-       ldr := config.NewLoader(nil, nil)
+func (*serverSuite) testConfig(c *check.C) (*arvados.Cluster, error) {
+       ldr := config.NewLoader(nil, ctxlog.TestLogger(c))
        cfg, err := ldr.Load()
        if err != nil {
                return nil, err
index 89d1a48078e1ce707975f229239a8abbf21e190d..2081c2ae1509b86305380b1e97803b1104c65630 100755 (executable)
@@ -80,9 +80,13 @@ Clusters:
       SSO:
         ExternalURL: "https://$localip:${services[sso]}"
       Keepproxy:
+        ExternalURL: "https://$localip:${services[keepproxy-ssl]}"
+        InternalURLs:
+          "http://localhost:${services[keepproxy]}": {}
+      Keepstore:
         InternalURLs:
-          "http://localhost:${services[keepproxy]}/": {}
-        ExternalURL: "https://$localip:${services[keepproxy-ssl]}/"
+          "http://localhost:${services[keepstore0]}": {}
+          "http://localhost:${services[keepstore1]}": {}
       Websocket:
         ExternalURL: "wss://$localip:${services[websockets-ssl]}/websocket"
         InternalURLs:
@@ -112,10 +116,6 @@ Clusters:
       RailsAPI:
         InternalURLs:
           "http://localhost:${services[api]}/": {}
-      Keepproxy:
-        ExternalURL: "https://$localip:${services[keepproxy-ssl]}"
-        InternalURLs:
-          "http://localhost:${services[keepproxy]}": {}
     PostgreSQL:
       ConnectionPool: 32 # max concurrent connections per arvados server daemon
       Connection:
@@ -149,6 +149,19 @@ Clusters:
       GitCommand: /usr/share/gitolite3/gitolite-shell
       GitoliteHome: /var/lib/arvados/git
       Repositories: /var/lib/arvados/git/repositories
+    Volumes:
+      ${uuid_prefix}-nyw5e-000000000000000:
+        Driver: Directory
+        DriverParameters:
+          Root: /var/lib/arvados/keep0
+        AccessViaHosts:
+          "http://localhost:${services[keepstore0]}": {}
+      ${uuid_prefix}-nyw5e-111111111111111:
+        Driver: Directory
+        DriverParameters:
+          Root: /var/lib/arvados/keep1
+        AccessViaHosts:
+          "http://localhost:${services[keepstore1]}": {}
 EOF
 
 /usr/local/lib/arvbox/yml_override.py /var/lib/arvados/cluster_config.yml
index f16cb44b7f56de46ab0e4be35ade64a4f3693ff1..0c4daec1bf26b2a5db04451b71608d4e6a8d0287 100755 (executable)
@@ -26,7 +26,7 @@ export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
 set +e
 read -rd $'\000' keepservice <<EOF
 {
- "service_host":"$localip",
+ "service_host":"localhost",
  "service_port":$2,
  "service_ssl_flag":false,
  "service_type":"disk"
@@ -45,17 +45,14 @@ fi
 management_token=$(cat /var/lib/arvados/management_token)
 
 set +e
-killall -HUP keepproxy
+sv hup /var/lib/arvbox/service/keepproxy
 
 cat >/var/lib/arvados/$1.yml <<EOF
-Listen: ":$2"
+Listen: "localhost:$2"
 BlobSigningKeyFile: /var/lib/arvados/blob_signing_key
 SystemAuthTokenFile: /var/lib/arvados/superuser_token
 ManagementToken: $management_token
 MaxBuffers: 20
-Volumes:
-  - Type: Directory
-    Root: /var/lib/arvados/$1
 EOF
 
 exec /usr/local/bin/keepstore -config=/var/lib/arvados/$1.yml
index f0733e0c2b86ded24445a968670169ca80c953eb..4235bcd0c864761701d0f98febf9f80def47aabf 100755 (executable)
@@ -10,7 +10,7 @@ set -ex -o pipefail
 
 cd /usr/src/composer
 
-npm -d install --prefix /usr/local --global yarn
+npm -d install --prefix /usr/local --global yarn@1.17.3
 
 yarn install
 
index 0d60e74128365605a49194b27cb2cf9c09af9618..1b28a8d014a9fbefc3291440b781d96986fd4caa 100755 (executable)
@@ -28,6 +28,12 @@ http {
      default_type application/octet-stream;
      client_max_body_size 128M;
 
+     geo \$external_client {
+          default     1;
+          127.0.0.0/8 0;
+          $localip/32 0;
+     }
+
      server {
             listen ${services[doc]} default_server;
             listen [::]:${services[doc]} default_server;
@@ -55,6 +61,7 @@ http {
       proxy_set_header Host \$http_host;
       proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
       proxy_set_header X-Forwarded-Proto https;
+      proxy_set_header X-External-Client \$external_client;
       proxy_redirect off;
     }
   }
index e9e1ca4f8c8b0901c1e3792f2eb50d25f74c8fc3..85c03399f79e1410582ec21a844558dcf4c2708c 100755 (executable)
@@ -10,7 +10,7 @@ set -ex -o pipefail
 
 cd /usr/src/workbench2
 
-npm -d install --prefix /usr/local --global yarn
+npm -d install --prefix /usr/local --global yarn@1.17.3
 
 yarn install