Merge branch '5824-keep-web-workbench' refs #5824
authorTom Clegg <tom@curoverse.com>
Wed, 11 Nov 2015 17:14:16 +0000 (12:14 -0500)
committerTom Clegg <tom@curoverse.com>
Wed, 11 Nov 2015 17:14:16 +0000 (12:14 -0500)
24 files changed:
apps/workbench/app/controllers/collections_controller.rb
apps/workbench/config/application.default.yml
apps/workbench/test/controllers/collections_controller_test.rb
apps/workbench/test/helpers/download_helper.rb [new file with mode: 0644]
apps/workbench/test/integration/collection_upload_test.rb
apps/workbench/test/integration/download_test.rb [new file with mode: 0644]
apps/workbench/test/integration_helper.rb
apps/workbench/test/test_helper.rb
doc/install/install-keep-web.html.textile.liquid
sdk/go/arvadosclient/arvadosclient_test.go
sdk/go/arvadostest/run_servers.go
sdk/go/keepclient/discover.go [new file with mode: 0644]
sdk/go/keepclient/discover_test.go [new file with mode: 0644]
sdk/go/keepclient/support.go
sdk/python/tests/nginx.conf
sdk/python/tests/run_test_server.py
services/api/app/controllers/database_controller.rb
services/api/test/fixtures/api_client_authorizations.yml
services/keep-web/handler.go
services/keep-web/handler_test.go
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy_test.go
services/keepstore/pull_worker_integration_test.go
tools/keep-rsync/keep-rsync_test.go

index e01151ca408b567ec9908349bdfd7a51bcd15a2f..0806a0a4297e57c9c546a3a5d724c56a19e6c32f 100644 (file)
@@ -1,4 +1,6 @@
 require "arvados/keep"
+require "uri"
+require "cgi"
 
 class CollectionsController < ApplicationController
   include ActionController::Live
@@ -130,11 +132,34 @@ class CollectionsController < ApplicationController
     usable_token = find_usable_token(tokens) do
       coll = Collection.find(params[:uuid])
     end
+    if usable_token.nil?
+      # Response already rendered.
+      return
+    end
+
+    # If we are configured to use a keep-web server, just redirect to
+    # the appropriate URL.
+    if Rails.configuration.keep_web_url or
+        Rails.configuration.keep_web_download_url
+      opts = {}
+      if usable_token == params[:reader_token]
+        opts[:path_token] = usable_token
+      elsif usable_token == Rails.configuration.anonymous_user_token
+        # Don't pass a token at all
+      else
+        # We pass the current user's real token only if it's necessary
+        # to read the collection.
+        opts[:query_token] = usable_token
+      end
+      opts[:disposition] = params[:disposition] if params[:disposition]
+      return redirect_to keep_web_url(params[:uuid], params[:file], opts)
+    end
+
+    # No keep-web server available. Get the file data with arv-get,
+    # and serve it through Rails.
 
     file_name = params[:file].andand.sub(/^(\.\/|\/|)/, './')
-    if usable_token.nil?
-      return  # Response already rendered.
-    elsif file_name.nil? or not coll.manifest.has_file?(file_name)
+    if file_name.nil? or not coll.manifest.has_file?(file_name)
       return render_not_found
     end
 
@@ -305,6 +330,61 @@ class CollectionsController < ApplicationController
     return nil
   end
 
+  def keep_web_url(uuid_or_pdh, file, opts)
+    munged_id = uuid_or_pdh.sub('+', '-')
+    fmt = {uuid_or_pdh: munged_id}
+
+    tmpl = Rails.configuration.keep_web_url
+    if Rails.configuration.keep_web_download_url and
+        (!tmpl or opts[:disposition] == 'attachment')
+      # Prefer the attachment-only-host when we want an attachment
+      # (and when there is no preview link configured)
+      tmpl = Rails.configuration.keep_web_download_url
+    else
+      check_uri = URI.parse(tmpl % fmt)
+      if opts[:query_token] and
+          not check_uri.host.start_with?(munged_id + "--") and
+          not check_uri.host.start_with?(munged_id + ".")
+        # We're about to pass a token in the query string, but
+        # keep-web can't accept that safely at a single-origin URL
+        # template (unless it's -attachment-only-host).
+        tmpl = Rails.configuration.keep_web_download_url
+        if not tmpl
+          raise ArgumentError, "Download precluded by site configuration"
+        end
+        logger.warn("Using download link, even though inline content " \
+                    "was requested: #{check_uri.to_s}")
+      end
+    end
+
+    if tmpl == Rails.configuration.keep_web_download_url
+      # This takes us to keep-web's -attachment-only-host so there is
+      # no need to add ?disposition=attachment.
+      opts.delete :disposition
+    end
+
+    uri = URI.parse(tmpl % fmt)
+    uri.path += '/' unless uri.path.end_with? '/'
+    if opts[:path_token]
+      uri.path += 't=' + opts[:path_token] + '/'
+    end
+    uri.path += '_/'
+    uri.path += CGI::escape(file)
+
+    query = CGI::parse(uri.query || '')
+    { query_token: 'api_token',
+      disposition: 'disposition' }.each do |opt, param|
+      if opts.include? opt
+        query[param] = opts[opt]
+      end
+    end
+    unless query.empty?
+      uri.query = query.to_query
+    end
+
+    uri.to_s
+  end
+
   # Note: several controller and integration tests rely on stubbing
   # file_enumerator to return fake file content.
   def file_enumerator opts
index 00959bbb3bea30568e62ddb82c9f0d7f733fb44d..c4a92d273950bbebe15b0faf29ccbe58b3cde3b4 100644 (file)
@@ -225,3 +225,32 @@ common:
   # E.g., using a name-based proxy server to forward connections to shell hosts:
   # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
   shell_in_a_box_url: false
+
+  # Format of preview links. If false, use keep_web_download_url
+  # instead, and disable inline preview. If both are false, use
+  # Workbench's built-in file download/preview mechanism.
+  #
+  # Examples:
+  # keep_web_url: https://%{uuid_or_pdh}.collections.uuid_prefix.arvadosapi.com
+  # keep_web_url: https://%{uuid_or_pdh}--collections.uuid_prefix.arvadosapi.com
+  #
+  # Example supporting only public data and collection-sharing links
+  # (other data will be handled as downloads via keep_web_download_url):
+  # keep_web_url: https://collections.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
+  keep_web_url: false
+
+  # Format of download links. If false, use keep_web_url with
+  # disposition=attachment query param.
+  #
+  # The host part of the keep_web_download_url value here must match
+  # the -attachment-only-host argument given to keep-web: if
+  # keep_web_download_url is "https://FOO.EXAMPLE/c=..." then keep-web
+  # must run with "-attachment-only-host=FOO.EXAMPLE".
+  #
+  # If keep_web_download_url is false, and keep_web_url uses a
+  # single-origin form, then Workbench will show an error page
+  # when asked to download or preview private data.
+  #
+  # Example:
+  # keep_web_download_url: https://download.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
+  keep_web_download_url: false
index 13644e00bdce28db3460aa2f722f679deb107c7e..8c42145962af30718a4b6aa52c6b23e7dcd35307 100644 (file)
@@ -10,6 +10,15 @@ class CollectionsControllerTest < ActionController::TestCase
 
   NONEXISTENT_COLLECTION = "ffffffffffffffffffffffffffffffff+0"
 
+  def config_anonymous enable
+    Rails.configuration.anonymous_user_token =
+      if enable
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      else
+        false
+      end
+  end
+
   def stub_file_content
     # For the duration of the current test case, stub file download
     # content with a randomized (but recognizable) string. Return the
@@ -167,8 +176,7 @@ class CollectionsControllerTest < ActionController::TestCase
   end
 
   test 'anonymous download' do
-    Rails.configuration.anonymous_user_token =
-      api_fixture('api_client_authorizations')['anonymous']['api_token']
+    config_anonymous true
     expect_content = stub_file_content
     get :show_file, {
       uuid: api_fixture('collections')['user_agreement_in_anonymously_accessible_project']['uuid'],
@@ -205,15 +213,14 @@ class CollectionsControllerTest < ActionController::TestCase
                      "using a reader token set the session's API token")
   end
 
-  [false, api_fixture('api_client_authorizations')['anonymous']['api_token']].
-    each do |anon_conf|
-    test "download a file using a reader token with insufficient scope (anon_conf=#{!!anon_conf})" do
-      Rails.configuration.anonymous_user_token = anon_conf
+  [false, true].each do |anon|
+    test "download a file using a reader token with insufficient scope, anon #{anon}" do
+      config_anonymous anon
       params = collection_params(:foo_file, 'foo')
       params[:reader_token] =
         api_fixture('api_client_authorizations')['active_noscope']['api_token']
       get(:show_file, params)
-      if anon_conf
+      if anon
         # Some files can be shown without a valid token, but not this one.
         assert_response 404
       else
@@ -463,8 +470,7 @@ class CollectionsControllerTest < ActionController::TestCase
   end
 
   test "anonymous user accesses collection in shared project" do
-    Rails.configuration.anonymous_user_token =
-      api_fixture('api_client_authorizations')['anonymous']['api_token']
+    config_anonymous true
     collection = api_fixture('collections')['public_text_file']
     get(:show, {id: collection['uuid']})
 
@@ -514,4 +520,109 @@ class CollectionsControllerTest < ActionController::TestCase
     get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
     assert_not_includes @response.body, '<a href="#Upload"'
   end
+
+  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.example', dl_cfg=false
+    Rails.configuration.keep_web_url = cfg
+    Rails.configuration.keep_web_download_url = dl_cfg
+    @controller.expects(:file_enumerator).never
+  end
+
+  %w(uuid portable_data_hash).each do |id_type|
+    test "Redirect to keep_web_url via #{id_type}" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with reader token" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/t=#{tok}/_/w+a+z", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with no token" do
+      setup_for_keep_web
+      config_anonymous true
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {uuid: id, file: "Hello World.txt"}
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/_/Hello+World.txt", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with disposition param" do
+      setup_for_keep_web
+      config_anonymous true
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {
+        uuid: id,
+        file: "Hello World.txt",
+        disposition: 'attachment',
+      }
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.example/_/Hello+World.txt?disposition=attachment", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_download_url via #{id_type}" do
+      setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
+                         'https://download.example/c=%{uuid_or_pdh}')
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+  end
+
+  [false, true].each do |anon|
+    test "No redirect to keep_web_url if collection not found, anon #{anon}" do
+      setup_for_keep_web
+      config_anonymous anon
+      id = api_fixture('collections')['w_a_z_file']['uuid']
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:spectator)
+      assert_response 404
+    end
+
+    test "Redirect download to keep_web_download_url, anon #{anon}" do
+      config_anonymous anon
+      setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
+                         'https://download.example/c=%{uuid_or_pdh}')
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['public_text_file']['uuid']
+      get :show_file, {
+        uuid: id,
+        file: 'Hello world.txt',
+        disposition: 'attachment',
+      }, session_for(:active)
+      assert_response :redirect
+      expect_url = "https://download.example/c=#{id.sub '+', '-'}/_/Hello+world.txt"
+      if not anon
+        expect_url += "?api_token=#{tok}"
+      end
+      assert_equal expect_url, @response.redirect_url
+    end
+  end
+
+  test "Error if file is impossible to retrieve from keep_web_url" do
+    # Cannot pass a session token using a single-origin keep-web URL,
+    # cannot read this collection without a session token.
+    setup_for_keep_web 'https://collections.example/c=%{uuid_or_pdh}', false
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+    assert_response 422
+  end
+
+  test "Redirect preview to keep_web_download_url when preview is disabled" do
+    setup_for_keep_web false, 'https://download.example/c=%{uuid_or_pdh}'
+    tok = api_fixture('api_client_authorizations')['active']['api_token']
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+    assert_response :redirect
+    assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
+  end
 end
diff --git a/apps/workbench/test/helpers/download_helper.rb b/apps/workbench/test/helpers/download_helper.rb
new file mode 100644 (file)
index 0000000..21fb4cd
--- /dev/null
@@ -0,0 +1,21 @@
+module DownloadHelper
+  module_function
+
+  def path
+    Rails.root.join 'tmp', 'downloads'
+  end
+
+  def clear
+    FileUtils.rm_f path
+    begin
+      Dir.mkdir path
+    rescue Errno::EEXIST
+    end
+  end
+
+  def done
+    Dir[path.join '*'].reject do |f|
+      /\.part$/ =~ f
+    end
+  end
+end
index 62efee4d67e6b4e5a84e2340bcc55902b18ba30d..903df90fb419c7ba231ae3c42d679abc85af41ed 100644 (file)
@@ -7,9 +7,19 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
         io.write content
       end
     end
+    # Database reset doesn't restore KeepServices; we have to
+    # save/restore manually.
+    use_token :admin do
+      @keep_services = KeepService.all.to_a
+    end
   end
 
   teardown do
+    use_token :admin do
+      @keep_services.each do |ks|
+        KeepService.find(ks.uuid).update_attributes(ks.attributes)
+      end
+    end
     testfiles.each do |filename, _|
       File.unlink(testfile_path filename)
     end
@@ -42,7 +52,7 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
     assert_match /_text":"\. d41d8\S+ 0:0:empty.txt\\n\. d41d8\S+ 0:0:empty\\\\040\(1\).txt\\n"/, body
   end
 
-  test "Upload non-empty files, report errors" do
+  test "Upload non-empty files" do
     need_selenium "to make file uploads work"
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
@@ -50,24 +60,17 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
     attach_file 'file_selector', testfile_path('foo.txt')
     assert_selector 'button:not([disabled])', text: 'Start'
     click_button 'Start'
-    if "test environment does not have a keepproxy yet, see #4534" != "fixed"
-      using_wait_time 20 do
-        assert_text :visible, 'error'
-      end
-    else
-      assert_text :visible, 'Done!'
-      visit sandbox_path+'.json'
-      assert_match /_text":"\. 0cc1\S+ 0:1:a\\n\. acbd\S+ 0:3:foo.txt\\n"/, body
-    end
+    assert_text :visible, 'Done!'
+    visit sandbox_path+'.json'
+    assert_match /_text":"\. 0cc1\S+ 0:1:a\\n\. acbd\S+ 0:3:foo.txt\\n"/, body
   end
 
   test "Report mixed-content error" do
     skip 'Test suite does not use TLS'
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      proxy.update_attributes service_ssl_flag: false
+    use_token :admin do
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_ssl_flag: false)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
@@ -82,11 +85,12 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
 
   test "Report network error" do
     need_selenium "to make file uploads work"
-    begin
-      use_token :admin
-      proxy = KeepService.find(api_fixture('keep_services')['proxy']['uuid'])
-      # Even if you somehow do port>2^16, surely nx.example.net won't respond
-      proxy.update_attributes service_host: 'nx.example.net', service_port: 99999
+    use_token :admin do
+      # Even if you somehow do port>2^16, surely nx.example.net won't
+      # respond
+      KeepService.where(service_type: 'proxy').first.
+        update_attributes(service_host: 'nx.example.net',
+                          service_port: 99999)
     end
     visit page_with_token 'active', sandbox_path
     find('.nav-tabs a', text: 'Upload').click
diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb
new file mode 100644 (file)
index 0000000..cf4246c
--- /dev/null
@@ -0,0 +1,51 @@
+require 'integration_helper'
+require 'helpers/download_helper'
+
+class DownloadTest < ActionDispatch::IntegrationTest
+  setup do
+    portfile = File.expand_path '../../../../../tmp/keep-web-ssl.port', __FILE__
+    @kwport = File.read portfile
+    Rails.configuration.keep_web_url = "https://localhost:#{@kwport}/c=%{uuid_or_pdh}"
+    CollectionsController.any_instance.expects(:file_enumerator).never
+
+    # Make sure Capybara can download files.
+    need_selenium 'for downloading', :selenium_with_download
+    DownloadHelper.clear
+
+    # Keep data isn't populated by fixtures, so we have to write any
+    # data we expect to read.
+    unless /^acbd/ =~ `echo -n foo | arv-put --no-progress --raw -` && $?.success?
+      raise $?.to_s
+    end
+  end
+
+  ['uuid', 'portable_data_hash'].each do |id_type|
+    test "download from keep-web by #{id_type} using a reader token" do
+      uuid_or_pdh = api_fixture('collections')['foo_file'][id_type]
+      token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
+      visit "/collections/download/#{uuid_or_pdh}/#{token}/"
+      within "#collection_files" do
+        click_link "foo"
+      end
+      wait_for_download 'foo', 'foo'
+    end
+  end
+
+  def wait_for_download filename, expect_data
+    data = nil
+    tries = 0
+    while tries < 20
+      sleep 0.1
+      tries += 1
+      data = File.read(DownloadHelper.path.join filename) rescue nil
+    end
+    assert_equal expect_data, data
+  end
+
+  # TODO(TC): test "view pages hosted by keep-web, using session
+  # token". We might persuade selenium to send
+  # "collection-uuid.dl.example" requests to localhost by configuring
+  # our test nginx server to work as its forward proxy. Until then,
+  # we're relying on the "Redirect to keep_web_url via #{id_type}"
+  # test in CollectionsControllerTest (and keep-web's tests).
+end
index 39fdf4b260abd68be05f252158f29629aa199199..1cda1275e11724074d2fd39e1c273e7613b74bd4 100644 (file)
@@ -19,6 +19,18 @@ Capybara.register_driver :poltergeist_without_file_api do |app|
   Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(extensions: [js])
 end
 
+Capybara.register_driver :selenium_with_download do |app|
+  profile = Selenium::WebDriver::Firefox::Profile.new
+  profile['browser.download.dir'] = DownloadHelper.path.to_s
+  profile['browser.download.downloadDir'] = DownloadHelper.path.to_s
+  profile['browser.download.defaultFolder'] = DownloadHelper.path.to_s
+  profile['browser.download.folderList'] = 2 # "save to user-defined location"
+  profile['browser.download.manager.showWhenStarting'] = false
+  profile['browser.helperApps.alwaysAsk.force'] = false
+  profile['browser.helperApps.neverAsk.saveToDisk'] = 'text/plain,application/octet-stream'
+  Capybara::Selenium::Driver.new app, profile: profile
+end
+
 module WaitForAjax
   Capybara.default_wait_time = 5
   def wait_for_ajax
@@ -73,8 +85,8 @@ module HeadlessHelper
     end
   end
 
-  def need_selenium reason=nil
-    Capybara.current_driver = :selenium
+  def need_selenium reason=nil, driver=:selenium
+    Capybara.current_driver = driver
     unless ENV['ARVADOS_TEST_HEADFUL'] or @headless
       @headless = HeadlessSingleton.get
       @headless.start
index 89d15c67d8de3708c4d74540518b9707e09aa432..41592af993261b6affd55fa6cba0f05780d475ec 100644 (file)
@@ -176,7 +176,10 @@ class ApiServerForTests
       # though it doesn't need to start up a new server).
       env_script = check_output %w(python ./run_test_server.py start --auth admin)
       check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_keep-web)
       check_output %w(python ./run_test_server.py start_nginx)
+      # This one isn't a no-op, even under run-tests.sh.
+      check_output %w(python ./run_test_server.py start_keep)
     end
     test_env = {}
     env_script.each_line do |line|
@@ -192,9 +195,11 @@ class ApiServerForTests
 
   def stop_test_server
     Dir.chdir PYTHON_TESTS_DIR do
+      check_output %w(python ./run_test_server.py stop_keep)
       # These are no-ops if we're running within run-tests.sh
       check_output %w(python ./run_test_server.py stop_nginx)
       check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop_keep-web)
       check_output %w(python ./run_test_server.py stop)
     end
     @@server_is_running = false
index 11a425d3476d5ae69ba74f0e81ab8dcd14d219fa..5eb4191e8cab6133944904a6a43c4028b19e39ee 100644 (file)
@@ -6,14 +6,15 @@ title: Install the keep-web server
 
 The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
 
-By convention, we use the following hostname for the keep-web service:
+By convention, we use the following hostnames for the keep-web service:
 
 <notextile>
-<pre><code>collections.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>download.<span class="userinput">uuid_prefix</span>.your.domain
+collections.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
 
-This hostname should resolve from anywhere on the internet.
+The above hostnames should resolve from anywhere on the internet.
 
 h2. Install keep-web
 
@@ -59,7 +60,11 @@ We recommend running @keep-web@ under "runit":https://packages.debian.org/search
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
 export ARVADOS_API_TOKEN="<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span>"
-exec sudo -u nobody keep-web -listen=<span class="userinput">:9002</span> -allow-anonymous 2&gt;&amp;1
+exec sudo -u nobody keep-web \
+ -listen=<span class="userinput">:9002</span> \
+ -attachment-only-host=<span class="userinput">download.uuid_prefix.your.domain</span> \
+ -allow-anonymous \
+ 2&gt;&amp;1
 </code></pre>
 </notextile>
 
@@ -73,7 +78,7 @@ The keep-web service will be accessible from anywhere on the internet, so we rec
 
 This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
 
-Note: A wildcard SSL certificate is required in order to proxy keep-web effectively.
+Note: A wildcard SSL certificate is required in order to support a full-featured secure keep-web service. Without it, keep-web can offer file downloads for all Keep data; however, in order to avoid cross-site scripting vulnerabilities, keep-web refuses to serve private data as web content except when it is accessed using a "secret link" share. With a wildcard SSL certificate and DNS configured appropriately, all data can be served as web content.
 
 For example, using Nginx:
 
@@ -84,7 +89,10 @@ upstream keep-web {
 
 server {
   listen                <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name           collections.<span class="userinput">uuid_prefix</span>.your.domain *.collections.<span class="userinput">uuid_prefix</span>.your.domain ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
+  server_name           download.<span class="userinput">uuid_prefix</span>.your.domain
+                        collections.<span class="userinput">uuid_prefix</span>.your.domain
+                        *.collections.<span class="userinput">uuid_prefix</span>.your.domain
+                        ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
 
   proxy_connect_timeout 90s;
   proxy_read_timeout    300s;
@@ -104,17 +112,29 @@ server {
 h3. Configure DNS
 
 Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
-* @*--collections.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
-* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
-* @collections.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
+* @download.uuid_prefix.your.domain@
+* @collections.uuid_prefix.your.domain@
+* @*--collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for @*.uuid_prefix.your.domain@ and your DNS server allows this without interfering with other DNS names.
+* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names.
+
+If neither of the above wildcard options is feasible, only unauthenticated requests (public data and collection sharing links) will be served as web content at @collections.uuid_prefix.your.domain@. The @download@ name will be used to serve authenticated content, but only as file downloads.
 
 h3. Tell Workbench about the keep-web service
 
-Add *one* of the following entries to your Workbench configuration file (@/etc/arvados/workbench/application.yml@), depending on your DNS setup:
+Workbench has features like "download file from collection" and "show image" which work better if the content is served by keep-web rather than Workbench itself. We recommend using the two different hostnames ("download" and "collections" above) for file downloads and inline content respectively.
+
+Add the following entry to your Workbench configuration file (@/etc/arvados/workbench/application.yml@). This URL will be used for file downloads.
+
+<notextile>
+<pre><code>keep_web_download_url: https://download.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+</code></pre>
+</notextile>
+
+Additionally, add *one* of the following entries to your Workbench configuration file, depending on your DNS setup. This URL will be used to serve user content that can be displayed in the browser, like image previews and static HTML pages.
 
 <notextile>
 <pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
 keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
-keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
 </code></pre>
 </notextile>
index f2fd0b1b334a3e3790e6bb8c39a60db8a335ad66..8e32efe4f987adccbe77b573a15ab27dbdfcc707 100644 (file)
@@ -29,6 +29,11 @@ func (s *ServerRequiredSuite) SetUpSuite(c *C) {
        RetryDelay = 0
 }
 
+func (s *ServerRequiredSuite) TearDownSuite(c *C) {
+       arvadostest.StopKeep(2)
+       arvadostest.StopAPI()
+}
+
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
        arvadostest.ResetEnv()
 }
index 27c552a4e104094ca2ed15991e310a3b7e9cd65e..c61b68b319fbe50b5e71ea97e5751da57af687d9 100644 (file)
@@ -3,9 +3,6 @@ package arvadostest
 import (
        "bufio"
        "bytes"
-       "fmt"
-       "io"
-       "io/ioutil"
        "log"
        "os"
        "os/exec"
@@ -68,24 +65,12 @@ func StartAPI() {
        chdirToPythonTests()
 
        cmd := exec.Command("python", "run_test_server.py", "start", "--auth", "admin")
-       stderr, err := cmd.StderrPipe()
-       if err != nil {
-               log.Fatal(err)
-       }
-       go io.Copy(os.Stderr, stderr)
-       stdout, err := cmd.StdoutPipe()
+       cmd.Stdin = nil
+       cmd.Stderr = os.Stderr
+
+       authScript, err := cmd.Output()
        if err != nil {
-               log.Fatal(err)
-       }
-       if err = cmd.Start(); err != nil {
-               log.Fatal(err)
-       }
-       var authScript []byte
-       if authScript, err = ioutil.ReadAll(stdout); err != nil {
-               log.Fatal(err)
-       }
-       if err = cmd.Wait(); err != nil {
-               log.Fatal(err)
+               log.Fatalf("%+v: %s", cmd.Args, err)
        }
        ParseAuthSettings(authScript)
        ResetEnv()
@@ -96,7 +81,7 @@ func StopAPI() {
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       exec.Command("python", "run_test_server.py", "stop").Run()
+       bgRun(exec.Command("python", "run_test_server.py", "stop"))
 }
 
 // StartKeep starts the given number of keep servers,
@@ -112,16 +97,7 @@ func StartKeep(numKeepServers int, enforcePermissions bool) {
                cmdArgs = append(cmdArgs, "--keep-enforce-permissions")
        }
 
-       cmd := exec.Command("python", cmdArgs...)
-
-       stderr, err := cmd.StderrPipe()
-       if err != nil {
-               log.Fatalf("Setting up stderr pipe: %s", err)
-       }
-       go io.Copy(os.Stderr, stderr)
-       if err := cmd.Run(); err != nil {
-               panic(fmt.Sprintf("'python run_test_server.py start_keep' returned error %s", err))
-       }
+       bgRun(exec.Command("python", cmdArgs...))
 }
 
 // StopKeep stops keep servers that were started with StartKeep.
@@ -132,5 +108,27 @@ func StopKeep(numKeepServers int) {
        defer os.Chdir(cwd)
        chdirToPythonTests()
 
-       exec.Command("python", "run_test_server.py", "stop_keep", "--num-keep-servers", strconv.Itoa(numKeepServers))
+       cmd := exec.Command("python", "run_test_server.py", "stop_keep", "--num-keep-servers", strconv.Itoa(numKeepServers))
+       cmd.Stdin = nil
+       cmd.Stderr = os.Stderr
+       cmd.Stdout = os.Stderr
+       if err := cmd.Run(); err != nil {
+               log.Fatalf("%+v: %s", cmd.Args, err)
+       }
+}
+
+// Start cmd, with stderr and stdout redirected to our own
+// stderr. Return when the process exits, but do not wait for its
+// stderr and stdout to close: any grandchild processes will continue
+// writing to our stderr.
+func bgRun(cmd *exec.Cmd) {
+       cmd.Stdin = nil
+       cmd.Stderr = os.Stderr
+       cmd.Stdout = os.Stderr
+       if err := cmd.Start(); err != nil {
+               log.Fatalf("%+v: %s", cmd.Args, err)
+       }
+       if _, err := cmd.Process.Wait(); err != nil {
+               log.Fatalf("%+v: %s", cmd.Args, err)
+       }
 }
diff --git a/sdk/go/keepclient/discover.go b/sdk/go/keepclient/discover.go
new file mode 100644 (file)
index 0000000..650c2b5
--- /dev/null
@@ -0,0 +1,130 @@
+package keepclient
+
+import (
+       "encoding/json"
+       "fmt"
+       "log"
+       "os"
+       "os/signal"
+       "reflect"
+       "strings"
+       "syscall"
+       "time"
+)
+
+// DiscoverKeepServers gets list of available keep services from api server
+func (this *KeepClient) DiscoverKeepServers() error {
+       var list svcList
+
+       // Get keep services from api server
+       err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
+       if err != nil {
+               return err
+       }
+
+       return this.loadKeepServers(list)
+}
+
+// LoadKeepServicesFromJSON gets list of available keep services from given JSON
+func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
+       var list svcList
+
+       // Load keep services from given json
+       dec := json.NewDecoder(strings.NewReader(services))
+       if err := dec.Decode(&list); err != nil {
+               return err
+       }
+
+       return this.loadKeepServers(list)
+}
+
+// RefreshServices calls DiscoverKeepServers to refresh the keep
+// service list on SIGHUP; when the given interval has elapsed since
+// the last refresh; and (if the last refresh failed) the given
+// errInterval has elapsed.
+func (kc *KeepClient) RefreshServices(interval, errInterval time.Duration) {
+       var previousRoots = []map[string]string{}
+
+       timer := time.NewTimer(interval)
+       gotHUP := make(chan os.Signal, 1)
+       signal.Notify(gotHUP, syscall.SIGHUP)
+
+       for {
+               select {
+               case <-gotHUP:
+               case <-timer.C:
+               }
+               timer.Reset(interval)
+
+               if err := kc.DiscoverKeepServers(); err != nil {
+                       log.Println("Error retrieving services list: %v (retrying in %v)", err, errInterval)
+                       timer.Reset(errInterval)
+                       continue
+               }
+               newRoots := []map[string]string{kc.LocalRoots(), kc.GatewayRoots()}
+
+               if !reflect.DeepEqual(previousRoots, newRoots) {
+                       log.Printf("Updated services list: locals %v gateways %v", newRoots[0], newRoots[1])
+                       previousRoots = newRoots
+               }
+
+               if len(newRoots[0]) == 0 {
+                       log.Printf("WARNING: No local services (retrying in %v)", errInterval)
+                       timer.Reset(errInterval)
+               }
+       }
+}
+
+// loadKeepServers
+func (this *KeepClient) loadKeepServers(list svcList) error {
+       listed := make(map[string]bool)
+       localRoots := make(map[string]string)
+       gatewayRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
+
+       // replicasPerService is 1 for disks; unknown or unlimited otherwise
+       this.replicasPerService = 1
+       this.Using_proxy = false
+
+       for _, service := range list.Items {
+               scheme := "http"
+               if service.SSL {
+                       scheme = "https"
+               }
+               url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
+
+               // Skip duplicates
+               if listed[url] {
+                       continue
+               }
+               listed[url] = true
+
+               localRoots[service.Uuid] = url
+               if service.SvcType == "proxy" {
+                       this.Using_proxy = true
+               }
+
+               if service.ReadOnly == false {
+                       writableLocalRoots[service.Uuid] = url
+                       if service.SvcType != "disk" {
+                               this.replicasPerService = 0
+                       }
+               }
+
+               // Gateway services are only used when specified by
+               // UUID, so there's nothing to gain by filtering them
+               // by service type. Including all accessible services
+               // (gateway and otherwise) merely accommodates more
+               // service configurations.
+               gatewayRoots[service.Uuid] = url
+       }
+
+       if this.Using_proxy {
+               this.setClientSettingsProxy()
+       } else {
+               this.setClientSettingsDisk()
+       }
+
+       this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
+       return nil
+}
diff --git a/sdk/go/keepclient/discover_test.go b/sdk/go/keepclient/discover_test.go
new file mode 100644 (file)
index 0000000..43a984e
--- /dev/null
@@ -0,0 +1,21 @@
+package keepclient
+
+import (
+       "fmt"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+)
+
+func ExampleRefreshServices() {
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               panic(err)
+       }
+       kc, err := MakeKeepClient(&arv)
+       if err != nil {
+               panic(err)
+       }
+       go kc.RefreshServices(5*time.Minute, 3*time.Second)
+       fmt.Printf("LocalRoots: %#v\n", kc.LocalRoots())
+}
index 8414afab1eace8b00125ca88f4f37279b95fc558..90ac3bcb17dc12913abe6b5a130c0c44d1efb6fa 100644 (file)
@@ -2,7 +2,6 @@ package keepclient
 
 import (
        "crypto/md5"
-       "encoding/json"
        "errors"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/streamer"
@@ -87,86 +86,6 @@ type svcList struct {
        Items []keepService `json:"items"`
 }
 
-// DiscoverKeepServers gets list of available keep services from api server
-func (this *KeepClient) DiscoverKeepServers() error {
-       var list svcList
-
-       // Get keep services from api server
-       err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
-       if err != nil {
-               return err
-       }
-
-       return this.loadKeepServers(list)
-}
-
-// LoadKeepServicesFromJSON gets list of available keep services from given JSON
-func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
-       var list svcList
-
-       // Load keep services from given json
-       dec := json.NewDecoder(strings.NewReader(services))
-       if err := dec.Decode(&list); err != nil {
-               return err
-       }
-
-       return this.loadKeepServers(list)
-}
-
-// loadKeepServers
-func (this *KeepClient) loadKeepServers(list svcList) error {
-       listed := make(map[string]bool)
-       localRoots := make(map[string]string)
-       gatewayRoots := make(map[string]string)
-       writableLocalRoots := make(map[string]string)
-
-       // replicasPerService is 1 for disks; unknown or unlimited otherwise
-       this.replicasPerService = 1
-       this.Using_proxy = false
-
-       for _, service := range list.Items {
-               scheme := "http"
-               if service.SSL {
-                       scheme = "https"
-               }
-               url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
-
-               // Skip duplicates
-               if listed[url] {
-                       continue
-               }
-               listed[url] = true
-
-               localRoots[service.Uuid] = url
-               if service.SvcType == "proxy" {
-                       this.Using_proxy = true
-               }
-
-               if service.ReadOnly == false {
-                       writableLocalRoots[service.Uuid] = url
-                       if service.SvcType != "disk" {
-                               this.replicasPerService = 0
-                       }
-               }
-
-               // Gateway services are only used when specified by
-               // UUID, so there's nothing to gain by filtering them
-               // by service type. Including all accessible services
-               // (gateway and otherwise) merely accommodates more
-               // service configurations.
-               gatewayRoots[service.Uuid] = url
-       }
-
-       if this.Using_proxy {
-               this.setClientSettingsProxy()
-       } else {
-               this.setClientSettingsDisk()
-       }
-
-       this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
-       return nil
-}
-
 type uploadStatus struct {
        err             error
        url             string
index d8a207f2cf2ce506d9406a646272e92c51c201e6..b14c674523f067469cbdfebd96dd035597b4bb44 100644 (file)
@@ -28,4 +28,18 @@ http {
       proxy_pass http://keepproxy;
     }
   }
+  upstream keep-web {
+    server localhost:{{KEEPWEBPORT}};
+  }
+  server {
+    listen *:{{KEEPWEBSSLPORT}} ssl default_server;
+    server_name ~^(?<request_host>.*)$;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keep-web;
+      proxy_set_header Host $request_host:{{KEEPWEBPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+  }
 }
index 972b7f9d5172077b740e157cdb3f59b360ca824f..a83566aa669fde27b6a2718aa3baaca8fd360fdb 100644 (file)
@@ -32,7 +32,6 @@ import arvados.config
 
 ARVADOS_DIR = os.path.realpath(os.path.join(MY_DIRNAME, '../../..'))
 SERVICES_SRC_DIR = os.path.join(ARVADOS_DIR, 'services')
-SERVER_PID_PATH = 'tmp/pids/test-server.pid'
 if 'GOPATH' in os.environ:
     gopaths = os.environ['GOPATH'].split(':')
     gobins = [os.path.join(path, 'bin') for path in gopaths]
@@ -70,28 +69,62 @@ def kill_server_pid(pidfile, wait=10, passenger_root=False):
     import signal
     import subprocess
     import time
-    try:
-        if passenger_root:
-            # First try to shut down nicely
-            restore_cwd = os.getcwd()
-            os.chdir(passenger_root)
-            subprocess.call([
-                'bundle', 'exec', 'passenger', 'stop', '--pid-file', pidfile])
-            os.chdir(restore_cwd)
-        now = time.time()
-        timeout = now + wait
-        with open(pidfile, 'r') as f:
-            server_pid = int(f.read())
-        while now <= timeout:
-            if not passenger_root or timeout - now < wait / 2:
-                # Half timeout has elapsed. Start sending SIGTERM
-                os.kill(server_pid, signal.SIGTERM)
-            # Raise OSError if process has disappeared
-            os.getpgid(server_pid)
+
+    now = time.time()
+    startTERM = now
+    deadline = now + wait
+
+    if passenger_root:
+        # First try to shut down nicely
+        restore_cwd = os.getcwd()
+        os.chdir(passenger_root)
+        subprocess.call([
+            'bundle', 'exec', 'passenger', 'stop', '--pid-file', pidfile])
+        os.chdir(restore_cwd)
+        # Use up to half of the +wait+ period waiting for "passenger
+        # stop" to work. If the process hasn't exited by then, start
+        # sending TERM signals.
+        startTERM += wait/2
+
+    server_pid = None
+    while now <= deadline and server_pid is None:
+        try:
+            with open(pidfile, 'r') as f:
+                server_pid = int(f.read())
+        except IOError:
+            # No pidfile = nothing to kill.
+            return
+        except ValueError as error:
+            # Pidfile exists, but we can't parse it. Perhaps the
+            # server has created the file but hasn't written its PID
+            # yet?
+            print("Parse error reading pidfile {}: {}".format(pidfile, error))
             time.sleep(0.1)
             now = time.time()
-    except EnvironmentError:
-        pass
+
+    while now <= deadline:
+        try:
+            exited, _ = os.waitpid(server_pid, os.WNOHANG)
+            if exited > 0:
+                return
+        except OSError:
+            # already exited, or isn't our child process
+            pass
+        try:
+            if now >= startTERM:
+                os.kill(server_pid, signal.SIGTERM)
+                print("Sent SIGTERM to {} ({})".format(server_pid, pidfile))
+        except OSError as error:
+            if error.errno == errno.ESRCH:
+                # Thrown by os.getpgid() or os.kill() if the process
+                # does not exist, i.e., our work here is done.
+                return
+            raise
+        time.sleep(0.1)
+        now = time.time()
+
+    print("Server PID {} ({}) did not exit, giving up after {}s".
+          format(server_pid, pidfile, wait))
 
 def find_available_port():
     """Return an IPv4 port number that is not in use right now.
@@ -139,6 +172,26 @@ def _wait_until_port_listens(port, timeout=10):
         format(port, timeout),
         file=sys.stderr)
 
+def _fifo2stderr(label):
+    """Create a fifo, and copy it to stderr, prepending label to each line.
+
+    Return value is the path to the new FIFO.
+
+    +label+ should contain only alphanumerics: it is also used as part
+    of the FIFO filename.
+    """
+    fifo = os.path.join(TEST_TMPDIR, label+'.fifo')
+    try:
+        os.remove(fifo)
+    except OSError as error:
+        if error.errno != errno.ENOENT:
+            raise
+    os.mkfifo(fifo, 0700)
+    subprocess.Popen(
+        ['sed', '-e', 's/^/['+label+'] /', fifo],
+        stdout=sys.stderr)
+    return fifo
+
 def run(leave_running_atexit=False):
     """Ensure an API server is running, and ARVADOS_API_* env vars have
     admin credentials for it.
@@ -159,7 +212,7 @@ def run(leave_running_atexit=False):
     # Delete cached discovery document.
     shutil.rmtree(arvados.http_cache('discovery'))
 
-    pid_file = os.path.join(SERVICES_SRC_DIR, 'api', SERVER_PID_PATH)
+    pid_file = _pidfile('api')
     pid_file_ok = find_server_pid(pid_file, 0)
 
     existing_api_host = os.environ.get('ARVADOS_TEST_API_HOST', my_api_host)
@@ -231,7 +284,7 @@ def run(leave_running_atexit=False):
     start_msg = subprocess.check_output(
         ['bundle', 'exec',
          'passenger', 'start', '-d', '-p{}'.format(port),
-         '--pid-file', os.path.join(os.getcwd(), pid_file),
+         '--pid-file', pid_file,
          '--log-file', os.path.join(os.getcwd(), 'log/test.log'),
          '--ssl',
          '--ssl-certificate', 'tmp/self-signed.pem',
@@ -293,7 +346,7 @@ def stop(force=False):
     """
     global my_api_host
     if force or my_api_host is not None:
-        kill_server_pid(os.path.join(SERVICES_SRC_DIR, 'api', SERVER_PID_PATH))
+        kill_server_pid(_pidfile('api'))
         my_api_host = None
 
 def _start_keep(n, keep_args):
@@ -307,9 +360,10 @@ def _start_keep(n, keep_args):
     for arg, val in keep_args.iteritems():
         keep_cmd.append("{}={}".format(arg, val))
 
-    logf = open(os.path.join(TEST_TMPDIR, 'keep{}.log'.format(n)), 'a+')
+    logf = open(_fifo2stderr('keep{}'.format(n)), 'w')
     kp0 = subprocess.Popen(
         keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+
     with open(_pidfile('keep{}'.format(n)), 'w') as f:
         f.write(str(kp0.pid))
 
@@ -342,7 +396,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
         token=os.environ['ARVADOS_API_TOKEN'],
         insecure=True)
 
-    for d in api.keep_services().list().execute()['items']:
+    for d in api.keep_services().list(filters=[['service_type','=','disk']]).execute()['items']:
         api.keep_services().delete(uuid=d['uuid']).execute()
     for d in api.keep_disks().list().execute()['items']:
         api.keep_disks().delete(uuid=d['uuid']).execute()
@@ -360,8 +414,14 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
             'keep_disk': {'keep_service_uuid': svc['uuid'] }
         }).execute()
 
+    # If keepproxy is running, send SIGHUP to make it discover the new
+    # keepstore services.
+    proxypidfile = _pidfile('keepproxy')
+    if os.path.exists(proxypidfile):
+        os.kill(int(open(proxypidfile).read()), signal.SIGHUP)
+
 def _stop_keep(n):
-    kill_server_pid(_pidfile('keep{}'.format(n)), 0)
+    kill_server_pid(_pidfile('keep{}'.format(n)))
     if os.path.exists("{}/keep{}.volume".format(TEST_TMPDIR, n)):
         with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'r') as r:
             shutil.rmtree(r.read(), True)
@@ -378,20 +438,20 @@ def run_keep_proxy():
         return
     stop_keep_proxy()
 
-    admin_token = auth_token('admin')
     port = find_available_port()
     env = os.environ.copy()
-    env['ARVADOS_API_TOKEN'] = admin_token
+    env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
+    logf = open(_fifo2stderr('keepproxy'), 'w')
     kp = subprocess.Popen(
         ['keepproxy',
          '-pid='+_pidfile('keepproxy'),
          '-listen=:{}'.format(port)],
-        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
 
     api = arvados.api(
         version='v1',
         host=os.environ['ARVADOS_API_HOST'],
-        token=admin_token,
+        token=auth_token('admin'),
         insecure=True)
     for d in api.keep_services().list(
             filters=[['service_type','=','proxy']]).execute()['items']:
@@ -409,7 +469,7 @@ def run_keep_proxy():
 def stop_keep_proxy():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('keepproxy'), wait=0)
+    kill_server_pid(_pidfile('keepproxy'))
 
 def run_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
@@ -420,11 +480,12 @@ def run_arv_git_httpd():
     gitport = find_available_port()
     env = os.environ.copy()
     env.pop('ARVADOS_API_TOKEN', None)
+    logf = open(_fifo2stderr('arv-git-httpd'), 'w')
     agh = subprocess.Popen(
         ['arv-git-httpd',
          '-repo-root='+gitdir+'/test',
          '-address=:'+str(gitport)],
-        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('arv-git-httpd'), 'w') as f:
         f.write(str(agh.pid))
     _setport('arv-git-httpd', gitport)
@@ -433,19 +494,46 @@ def run_arv_git_httpd():
 def stop_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
+    kill_server_pid(_pidfile('arv-git-httpd'))
+
+def run_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_keep_web()
+
+    keepwebport = find_available_port()
+    env = os.environ.copy()
+    env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
+    logf = open(_fifo2stderr('keep-web'), 'w')
+    keepweb = subprocess.Popen(
+        ['keep-web',
+         '-allow-anonymous',
+         '-attachment-only-host=localhost:'+str(keepwebport),
+         '-listen=:'+str(keepwebport)],
+        env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
+    with open(_pidfile('keep-web'), 'w') as f:
+        f.write(str(keepweb.pid))
+    _setport('keep-web', keepwebport)
+    _wait_until_port_listens(keepwebport)
+
+def stop_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keep-web'))
 
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     nginxconf = {}
+    nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
     nginxconf['GITPORT'] = _getport('arv-git-httpd')
     nginxconf['GITSSLPORT'] = find_available_port()
     nginxconf['SSLCERT'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.pem')
     nginxconf['SSLKEY'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.key')
-    nginxconf['ACCESSLOG'] = os.path.join(TEST_TMPDIR, 'nginx_access_log.fifo')
+    nginxconf['ACCESSLOG'] = _fifo2stderr('nginx_access_log')
 
     conftemplatefile = os.path.join(MY_DIRNAME, 'nginx.conf')
     conffile = os.path.join(TEST_TMPDIR, 'nginx.conf')
@@ -458,29 +546,20 @@ def run_nginx():
     env = os.environ.copy()
     env['PATH'] = env['PATH']+':/sbin:/usr/sbin:/usr/local/sbin'
 
-    try:
-        os.remove(nginxconf['ACCESSLOG'])
-    except OSError as error:
-        if error.errno != errno.ENOENT:
-            raise
-
-    os.mkfifo(nginxconf['ACCESSLOG'], 0700)
     nginx = subprocess.Popen(
         ['nginx',
          '-g', 'error_log stderr info;',
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
-    cat_access = subprocess.Popen(
-        ['cat', nginxconf['ACCESSLOG']],
-        stdout=sys.stderr)
+    _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
 
 def stop_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
-    kill_server_pid(_pidfile('nginx'), wait=0)
+    kill_server_pid(_pidfile('nginx'))
 
 def _pidfile(program):
     return os.path.join(TEST_TMPDIR, program + '.pid')
@@ -555,6 +634,7 @@ class TestCaseWithServers(unittest.TestCase):
     MAIN_SERVER = None
     KEEP_SERVER = None
     KEEP_PROXY_SERVER = None
+    KEEP_WEB_SERVER = None
 
     @staticmethod
     def _restore_dict(src, dest):
@@ -573,7 +653,8 @@ class TestCaseWithServers(unittest.TestCase):
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
                 (cls.KEEP_SERVER, run_keep, stop_keep),
-                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy)):
+                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
+                (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
             if server_kwargs is not None:
                 start_func(**server_kwargs)
                 cls._cleanup_funcs.append(stop_func)
@@ -599,6 +680,7 @@ if __name__ == "__main__":
         'start', 'stop',
         'start_keep', 'stop_keep',
         'start_keep_proxy', 'stop_keep_proxy',
+        'start_keep-web', 'stop_keep-web',
         'start_arv-git-httpd', 'stop_arv-git-httpd',
         'start_nginx', 'stop_nginx',
     ]
@@ -629,7 +711,7 @@ if __name__ == "__main__":
     elif args.action == 'start_keep':
         run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
     elif args.action == 'stop_keep':
-        stop_keep()
+        stop_keep(num_servers=args.num_keep_servers)
     elif args.action == 'start_keep_proxy':
         run_keep_proxy()
     elif args.action == 'stop_keep_proxy':
@@ -638,6 +720,10 @@ if __name__ == "__main__":
         run_arv_git_httpd()
     elif args.action == 'stop_arv-git-httpd':
         stop_arv_git_httpd()
+    elif args.action == 'start_keep-web':
+        run_keep_web()
+    elif args.action == 'stop_keep-web':
+        stop_keep_web()
     elif args.action == 'start_nginx':
         run_nginx()
     elif args.action == 'stop_nginx':
index 64818da375ecc4fb0277a10be0f3f7e6b552a2e8..21c8e4710cb5e62dbe224a6034c68ea1bd40b05e 100644 (file)
@@ -29,6 +29,10 @@ class DatabaseController < ApplicationController
     fixturesets = Dir.glob(Rails.root.join('test', 'fixtures', '*.yml')).
       collect { |yml| yml.match(/([^\/]*)\.yml$/)[1] }
 
+    # Don't reset keep_services: clients need to discover our
+    # integration-testing keepstores, not test fixtures.
+    fixturesets -= %w[keep_services]
+
     table_names = '"' + ActiveRecord::Base.connection.tables.join('","') + '"'
 
     attempts_left = 20
index b9ea29c314792909d3fc48e3f2fdea32175bd70b..85c2c791a379552caabc10f6665e1dfca10a2190 100644 (file)
@@ -99,7 +99,7 @@ active_all_collections:
   user: active
   api_token: activecollectionsabcdefghijklmnopqrstuvwxyz1234567
   expires_at: 2038-01-01 00:00:00
-  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_disks"]
+  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_services/accessible"]
 
 active_userlist:
   api_client: untrusted
index 3e38cc35cad611b5944d5c68221f43989daf4bb5..847329f33137ca259b0399349e539cce01885172 100644 (file)
@@ -9,6 +9,8 @@ import (
        "net/http"
        "net/url"
        "os"
+       "regexp"
+       "strconv"
        "strings"
 
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
@@ -181,7 +183,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                        Path:     "/",
                        HttpOnly: true,
                })
-               redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+               // Propagate query parameters (except api_token) from
+               // the original request.
+               redirQuery := r.URL.Query()
+               redirQuery.Del("api_token")
+
+               redir := (&url.URL{
+                       Host:     r.Host,
+                       Path:     r.URL.Path,
+                       RawQuery: redirQuery.Encode(),
+               }).String()
 
                w.Header().Add("Location", redir)
                statusCode, statusText = http.StatusSeeOther, redir
@@ -292,8 +304,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
        defer rdr.Close()
 
-       // One or both of these can be -1 if not found:
        basenamePos := strings.LastIndex(filename, "/")
+       if basenamePos < 0 {
+               basenamePos = 0
+       }
        extPos := strings.LastIndex(filename, ".")
        if extPos > basenamePos {
                // Now extPos is safely >= 0.
@@ -304,13 +318,53 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
                w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
        }
-       if attachment {
-               w.Header().Set("Content-Disposition", "attachment")
-       }
 
-       w.WriteHeader(http.StatusOK)
-       _, err = io.Copy(w, rdr)
+       applyContentDispositionHdr(w, r, filename[basenamePos:], attachment)
+       rangeRdr, statusCode := applyRangeHdr(w, r, rdr)
+
+       w.WriteHeader(statusCode)
+       _, err = io.Copy(w, rangeRdr)
        if err != nil {
                statusCode, statusText = http.StatusBadGateway, err.Error()
        }
 }
+
+var rangeRe = regexp.MustCompile(`^bytes=0-([0-9]*)$`)
+
+func applyRangeHdr(w http.ResponseWriter, r *http.Request, rdr keepclient.ReadCloserWithLen) (io.Reader, int) {
+       w.Header().Set("Accept-Ranges", "bytes")
+       hdr := r.Header.Get("Range")
+       fields := rangeRe.FindStringSubmatch(hdr)
+       if fields == nil {
+               return rdr, http.StatusOK
+       }
+       rangeEnd, err := strconv.ParseInt(fields[1], 10, 64)
+       if err != nil {
+               // Empty or too big for int64 == send entire content
+               return rdr, http.StatusOK
+       }
+       if uint64(rangeEnd) >= rdr.Len() {
+               return rdr, http.StatusOK
+       }
+       w.Header().Set("Content-Length", fmt.Sprintf("%d", rangeEnd+1))
+       w.Header().Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", 0, rangeEnd, rdr.Len()))
+       return &io.LimitedReader{R: rdr, N: rangeEnd + 1}, http.StatusPartialContent
+}
+
+func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
+       disposition := "inline"
+       if isAttachment {
+               disposition = "attachment"
+       }
+       if strings.ContainsRune(r.RequestURI, '?') {
+               // Help the UA realize that the filename is just
+               // "filename.txt", not
+               // "filename.txt?disposition=attachment".
+               //
+               // TODO(TC): Follow advice at RFC 6266 appendix D
+               disposition += "; filename=" + strconv.QuoteToASCII(filename)
+       }
+       if disposition != "inline" {
+               w.Header().Set("Content-Disposition", disposition)
+       }
+}
index 392de94ffb5af1399bada756ed4a8efc7f33506b..94a1cf7c0746a3fd6a883a9f84177f61646da257 100644 (file)
@@ -32,9 +32,11 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
                arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
        } {
                resp := httptest.NewRecorder()
+               u := mustParseURL(testURL)
                req := &http.Request{
-                       Method: "GET",
-                       URL:    mustParseURL(testURL),
+                       Method:     "GET",
+                       URL:        u,
+                       RequestURI: u.RequestURI(),
                }
                (&handler{}).ServeHTTP(resp, req)
                c.Check(resp.Code, check.Equals, http.StatusNotFound)
@@ -120,10 +122,11 @@ func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string)
        } {
                u := mustParseURL("http://" + hostPath)
                req := &http.Request{
-                       Method: "GET",
-                       Host:   u.Host,
-                       URL:    u,
-                       Header: http.Header{},
+                       Method:     "GET",
+                       Host:       u.Host,
+                       URL:        u,
+                       RequestURI: u.RequestURI(),
+                       Header:     http.Header{},
                }
                failCode := authz(req, tok)
                resp := doReq(req)
@@ -157,10 +160,11 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
        cookies := (&http.Response{Header: resp.Header()}).Cookies()
        u, _ := req.URL.Parse(resp.Header().Get("Location"))
        req = &http.Request{
-               Method: "GET",
-               Host:   u.Host,
-               URL:    u,
-               Header: http.Header{},
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{},
        }
        for _, c := range cookies {
                req.AddCookie(c)
@@ -228,6 +232,21 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check
        )
 }
 
+// If client requests an attachment by putting ?disposition=attachment
+// in the query string, and gets redirected, the redirect target
+// should respond with an attachment.
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenRequestAttachment(c *check.C) {
+       resp := s.testVhostRedirectTokenToCookie(c, "GET",
+               arvadostest.FooCollection+".example.com/foo",
+               "?disposition=attachment&api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+       c.Check(strings.Split(resp.Header().Get("Content-Disposition"), ";")[0], check.Equals, "attachment")
+}
+
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
        defer func(orig bool) {
                trustAllContent = orig
@@ -315,14 +334,57 @@ func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
        )
 }
 
+func (s *IntegrationSuite) TestRange(c *check.C) {
+       u, _ := url.Parse("http://example.com/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt")
+       req := &http.Request{
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{"Range": {"bytes=0-4"}},
+       }
+       resp := httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusPartialContent)
+       c.Check(resp.Body.String(), check.Equals, "Hello")
+       c.Check(resp.Header().Get("Content-Length"), check.Equals, "5")
+       c.Check(resp.Header().Get("Content-Range"), check.Equals, "bytes 0-4/12")
+
+       req.Header.Set("Range", "bytes=0-")
+       resp = httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       // 200 and 206 are both correct:
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(resp.Body.String(), check.Equals, "Hello world\n")
+       c.Check(resp.Header().Get("Content-Length"), check.Equals, "12")
+
+       // Unsupported ranges are ignored
+       for _, hdr := range []string{
+               "bytes=5-5",  // non-zero start byte
+               "bytes=-5",   // last 5 bytes
+               "cubits=0-5", // unsupported unit
+               "bytes=0-340282366920938463463374607431768211456", // 2^128
+       } {
+               req.Header.Set("Range", hdr)
+               resp = httptest.NewRecorder()
+               (&handler{}).ServeHTTP(resp, req)
+               c.Check(resp.Code, check.Equals, http.StatusOK)
+               c.Check(resp.Body.String(), check.Equals, "Hello world\n")
+               c.Check(resp.Header().Get("Content-Length"), check.Equals, "12")
+               c.Check(resp.Header().Get("Content-Range"), check.Equals, "")
+               c.Check(resp.Header().Get("Accept-Ranges"), check.Equals, "bytes")
+       }
+}
+
 func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, reqBody string, expectStatus int, expectRespBody string) *httptest.ResponseRecorder {
        u, _ := url.Parse(`http://` + hostPath + queryString)
        req := &http.Request{
-               Method: method,
-               Host:   u.Host,
-               URL:    u,
-               Header: http.Header{"Content-Type": {contentType}},
-               Body:   ioutil.NopCloser(strings.NewReader(reqBody)),
+               Method:     method,
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{"Content-Type": {contentType}},
+               Body:       ioutil.NopCloser(strings.NewReader(reqBody)),
        }
 
        resp := httptest.NewRecorder()
@@ -335,15 +397,16 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
        if resp.Code != http.StatusSeeOther {
                return resp
        }
-       c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`".*`)
+       c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
        cookies := (&http.Response{Header: resp.Header()}).Cookies()
 
        u, _ = u.Parse(resp.Header().Get("Location"))
        req = &http.Request{
-               Method: "GET",
-               Host:   u.Host,
-               URL:    u,
-               Header: http.Header{},
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+               Header:     http.Header{},
        }
        for _, c := range cookies {
                req.AddCookie(c)
index 79ed51eb0e00f57eb38d4a31251a87c2bc5e866c..7b5cd2befb8f69bd25fa62674d01590214aec5ad 100644 (file)
@@ -14,7 +14,6 @@ import (
        "net/http"
        "os"
        "os/signal"
-       "reflect"
        "regexp"
        "sync"
        "syscall"
@@ -22,8 +21,8 @@ import (
 )
 
 // Default TCP address on which to listen for requests.
-// Initialized by the -listen flag.
-const DEFAULT_ADDR = ":25107"
+// Override with -listen.
+const DefaultAddr = ":25107"
 
 var listener net.Listener
 
@@ -42,7 +41,7 @@ func main() {
        flagset.StringVar(
                &listen,
                "listen",
-               DEFAULT_ADDR,
+               DefaultAddr,
                "Interface on which to listen for requests, in the format "+
                        "ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port "+
                        "to listen on all network interfaces.")
@@ -103,15 +102,14 @@ func main() {
        }
 
        kc.Want_replicas = default_replicas
-
        kc.Client.Timeout = time.Duration(timeout) * time.Second
+       go kc.RefreshServices(5*time.Minute, 3*time.Second)
 
        listener, err = net.Listen("tcp", listen)
        if err != nil {
                log.Fatalf("Could not listen on %v", listen)
        }
-
-       go RefreshServicesList(kc)
+       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
 
        // Shut down the server gracefully (by closing the listener)
        // if SIGTERM is received.
@@ -124,9 +122,7 @@ func main() {
        signal.Notify(term, syscall.SIGTERM)
        signal.Notify(term, syscall.SIGINT)
 
-       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
-
-       // Start listening for requests.
+       // Start serving requests.
        http.Serve(listener, MakeRESTRouter(!no_get, !no_put, kc))
 
        log.Println("shutting down")
@@ -138,30 +134,6 @@ type ApiTokenCache struct {
        expireTime int64
 }
 
-// Refresh the keep service list every five minutes.
-func RefreshServicesList(kc *keepclient.KeepClient) {
-       var previousRoots = []map[string]string{}
-       var delay time.Duration = 0
-       for {
-               time.Sleep(delay * time.Second)
-               delay = 300
-               if err := kc.DiscoverKeepServers(); err != nil {
-                       log.Println("Error retrieving services list:", err)
-                       delay = 3
-                       continue
-               }
-               newRoots := []map[string]string{kc.LocalRoots(), kc.GatewayRoots()}
-               if !reflect.DeepEqual(previousRoots, newRoots) {
-                       log.Printf("Updated services list: locals %v gateways %v", newRoots[0], newRoots[1])
-               }
-               if len(newRoots[0]) == 0 {
-                       log.Print("WARNING: No local services. Retrying in 3 seconds.")
-                       delay = 3
-               }
-               previousRoots = newRoots
-       }
-}
-
 // Cache the token and set an expire time.  If we already have an expire time
 // on the token, it is not updated.
 func (this *ApiTokenCache) RememberToken(token string) {
@@ -194,12 +166,8 @@ func (this *ApiTokenCache) RecallToken(token string) bool {
 }
 
 func GetRemoteAddress(req *http.Request) string {
-       if realip := req.Header.Get("X-Real-IP"); realip != "" {
-               if forwarded := req.Header.Get("X-Forwarded-For"); forwarded != realip {
-                       return fmt.Sprintf("%s (X-Forwarded-For %s)", realip, forwarded)
-               } else {
-                       return realip
-               }
+       if xff := req.Header.Get("X-Forwarded-For"); xff != "" {
+               return xff + "," + req.RemoteAddr
        }
        return req.RemoteAddr
 }
index fab1cd48a663303ca87293ed097f261b452beb40..7c5b3628963ca44e29f3c487e6792ed1f8e33feb 100644 (file)
@@ -2,21 +2,19 @@ package main
 
 import (
        "crypto/md5"
-       "crypto/tls"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       . "gopkg.in/check.v1"
-       "io"
        "io/ioutil"
        "log"
        "net/http"
-       "net/url"
        "os"
        "strings"
        "testing"
        "time"
+
+       . "gopkg.in/check.v1"
 )
 
 // Gocheck boilerplate
@@ -36,6 +34,8 @@ var _ = Suite(&NoKeepServerSuite{})
 // Test with no keepserver to simulate errors
 type NoKeepServerSuite struct{}
 
+var TestProxyUUID = "zzzzz-bi6l4-lrixqc4fxofbmzz"
+
 // Wait (up to 1 second) for keepproxy to listen on a port. This
 // avoids a race condition where we hit a "connection refused" error
 // because we start testing the proxy too soon.
@@ -73,6 +73,10 @@ func (s *ServerRequiredSuite) TearDownSuite(c *C) {
 
 func (s *NoKeepServerSuite) SetUpSuite(c *C) {
        arvadostest.StartAPI()
+       // We need API to have some keep services listed, but the
+       // services themselves should be unresponsive.
+       arvadostest.StartKeep(2, false)
+       arvadostest.StopKeep(2)
 }
 
 func (s *NoKeepServerSuite) SetUpTest(c *C) {
@@ -83,106 +87,31 @@ func (s *NoKeepServerSuite) TearDownSuite(c *C) {
        arvadostest.StopAPI()
 }
 
-func setupProxyService() {
-
-       client := &http.Client{Transport: &http.Transport{
-               TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}}
-
-       var req *http.Request
-       var err error
-       if req, err = http.NewRequest("POST", fmt.Sprintf("https://%s/arvados/v1/keep_services", os.Getenv("ARVADOS_API_HOST")), nil); err != nil {
-               panic(err.Error())
-       }
-       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", os.Getenv("ARVADOS_API_TOKEN")))
-
-       reader, writer := io.Pipe()
-
-       req.Body = reader
-
-       go func() {
-               data := url.Values{}
-               data.Set("keep_service", `{
-  "service_host": "localhost",
-  "service_port": 29950,
-  "service_ssl_flag": false,
-  "service_type": "proxy"
-}`)
-
-               writer.Write([]byte(data.Encode()))
-               writer.Close()
-       }()
-
-       var resp *http.Response
-       if resp, err = client.Do(req); err != nil {
-               panic(err.Error())
-       }
-       if resp.StatusCode != 200 {
-               panic(resp.Status)
-       }
-}
+func runProxy(c *C, args []string, bogusClientToken bool) *keepclient.KeepClient {
+       args = append([]string{"keepproxy"}, args...)
+       os.Args = append(args, "-listen=:0")
+       listener = nil
+       go main()
+       waitForListener()
 
-func runProxy(c *C, args []string, port int, bogusClientToken bool) *keepclient.KeepClient {
-       if bogusClientToken {
-               os.Setenv("ARVADOS_API_TOKEN", "bogus-token")
-       }
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, Equals, nil)
-       kc := keepclient.KeepClient{
-               Arvados:       &arv,
-               Want_replicas: 2,
-               Using_proxy:   true,
-               Client:        &http.Client{},
-       }
-       locals := map[string]string{
-               "proxy": fmt.Sprintf("http://localhost:%v", port),
-       }
-       writableLocals := map[string]string{
-               "proxy": fmt.Sprintf("http://localhost:%v", port),
-       }
-       kc.SetServiceRoots(locals, writableLocals, nil)
-       c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.LocalRoots()), Equals, 1)
-       for _, root := range kc.LocalRoots() {
-               c.Check(root, Equals, fmt.Sprintf("http://localhost:%v", port))
-       }
-       log.Print("keepclient created")
        if bogusClientToken {
-               arvadostest.ResetEnv()
+               arv.ApiToken = "bogus-token"
        }
-
-       {
-               os.Args = append(args, fmt.Sprintf("-listen=:%v", port))
-               listener = nil
-               go main()
+       kc := keepclient.New(&arv)
+       sr := map[string]string{
+               TestProxyUUID: "http://" + listener.Addr().String(),
        }
+       kc.SetServiceRoots(sr, sr, sr)
+       kc.Arvados.External = true
+       kc.Using_proxy = true
 
-       return &kc
+       return kc
 }
 
 func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
-       log.Print("TestPutAndGet start")
-
-       os.Args = []string{"keepproxy", "-listen=:29950"}
-       listener = nil
-       go main()
-       time.Sleep(100 * time.Millisecond)
-
-       setupProxyService()
-
-       os.Setenv("ARVADOS_EXTERNAL_CLIENT", "true")
-       arv, err := arvadosclient.MakeArvadosClient()
-       c.Assert(err, Equals, nil)
-       kc, err := keepclient.MakeKeepClient(&arv)
-       c.Assert(err, Equals, nil)
-       c.Check(kc.Arvados.External, Equals, true)
-       c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.LocalRoots()), Equals, 1)
-       for _, root := range kc.LocalRoots() {
-               c.Check(root, Equals, "http://localhost:29950")
-       }
-       os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
-
-       waitForListener()
+       kc := runProxy(c, nil, false)
        defer closeListener()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
@@ -254,15 +183,10 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
                c.Check(blocklen, Equals, int64(0))
                log.Print("Finished Get zero block")
        }
-
-       log.Print("TestPutAndGet done")
 }
 
 func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) {
-       log.Print("TestPutAskGetForbidden start")
-
-       kc := runProxy(c, []string{"keepproxy"}, 29951, true)
-       waitForListener()
+       kc := runProxy(c, nil, true)
        defer closeListener()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("bar")))
@@ -300,15 +224,10 @@ func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) {
                c.Check(blocklen, Equals, int64(0))
                log.Print("Get")
        }
-
-       log.Print("TestPutAskGetForbidden done")
 }
 
 func (s *ServerRequiredSuite) TestGetDisabled(c *C) {
-       log.Print("TestGetDisabled start")
-
-       kc := runProxy(c, []string{"keepproxy", "-no-get"}, 29952, false)
-       waitForListener()
+       kc := runProxy(c, []string{"-no-get"}, false)
        defer closeListener()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("baz")))
@@ -346,38 +265,26 @@ func (s *ServerRequiredSuite) TestGetDisabled(c *C) {
                c.Check(blocklen, Equals, int64(0))
                log.Print("Get")
        }
-
-       log.Print("TestGetDisabled done")
 }
 
 func (s *ServerRequiredSuite) TestPutDisabled(c *C) {
-       log.Print("TestPutDisabled start")
-
-       kc := runProxy(c, []string{"keepproxy", "-no-put"}, 29953, false)
-       waitForListener()
+       kc := runProxy(c, []string{"-no-put"}, false)
        defer closeListener()
 
-       {
-               hash2, rep, err := kc.PutB([]byte("quux"))
-               c.Check(hash2, Equals, "")
-               c.Check(rep, Equals, 0)
-               c.Check(err, Equals, keepclient.InsufficientReplicasError)
-               log.Print("PutB")
-       }
-
-       log.Print("TestPutDisabled done")
+       hash2, rep, err := kc.PutB([]byte("quux"))
+       c.Check(hash2, Equals, "")
+       c.Check(rep, Equals, 0)
+       c.Check(err, Equals, keepclient.InsufficientReplicasError)
 }
 
 func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
-       runProxy(c, []string{"keepproxy"}, 29954, false)
-       waitForListener()
+       runProxy(c, nil, false)
        defer closeListener()
 
        {
                client := http.Client{}
                req, err := http.NewRequest("OPTIONS",
-                       fmt.Sprintf("http://localhost:29954/%x+3",
-                               md5.Sum([]byte("foo"))),
+                       fmt.Sprintf("http://%s/%x+3", listener.Addr().String(), md5.Sum([]byte("foo"))),
                        nil)
                req.Header.Add("Access-Control-Request-Method", "PUT")
                req.Header.Add("Access-Control-Request-Headers", "Authorization, X-Keep-Desired-Replicas")
@@ -392,8 +299,7 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
 
        {
                resp, err := http.Get(
-                       fmt.Sprintf("http://localhost:29954/%x+3",
-                               md5.Sum([]byte("foo"))))
+                       fmt.Sprintf("http://%s/%x+3", listener.Addr().String(), md5.Sum([]byte("foo"))))
                c.Check(err, Equals, nil)
                c.Check(resp.Header.Get("Access-Control-Allow-Headers"), Equals, "Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas")
                c.Check(resp.Header.Get("Access-Control-Allow-Origin"), Equals, "*")
@@ -401,14 +307,13 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) {
-       runProxy(c, []string{"keepproxy"}, 29955, false)
-       waitForListener()
+       runProxy(c, nil, false)
        defer closeListener()
 
        {
                client := http.Client{}
                req, err := http.NewRequest("POST",
-                       "http://localhost:29955/",
+                       "http://"+listener.Addr().String()+"/",
                        strings.NewReader("qux"))
                req.Header.Add("Authorization", "OAuth2 4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h")
                req.Header.Add("Content-Type", "application/octet-stream")
@@ -444,8 +349,7 @@ func (s *ServerRequiredSuite) TestStripHint(c *C) {
 //   With a valid but non-existing prefix (expect "\n")
 //   With an invalid prefix (expect error)
 func (s *ServerRequiredSuite) TestGetIndex(c *C) {
-       kc := runProxy(c, []string{"keepproxy"}, 28852, false)
-       waitForListener()
+       kc := runProxy(c, nil, false)
        defer closeListener()
 
        // Put "index-data" blocks
@@ -479,7 +383,7 @@ func (s *ServerRequiredSuite) TestGetIndex(c *C) {
                {hash[:3], true, false},  // with matching prefix
                {"abcdef", false, false}, // with no such prefix
        } {
-               indexReader, err := kc.GetIndex("proxy", spec.prefix)
+               indexReader, err := kc.GetIndex(TestProxyUUID, spec.prefix)
                c.Assert(err, Equals, nil)
                indexResp, err := ioutil.ReadAll(indexReader)
                c.Assert(err, Equals, nil)
@@ -502,13 +406,12 @@ func (s *ServerRequiredSuite) TestGetIndex(c *C) {
        }
 
        // GetIndex with invalid prefix
-       _, err = kc.GetIndex("proxy", "xyz")
+       _, err = kc.GetIndex(TestProxyUUID, "xyz")
        c.Assert((err != nil), Equals, true)
 }
 
 func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) {
-       kc := runProxy(c, []string{"keepproxy"}, 28853, false)
-       waitForListener()
+       kc := runProxy(c, nil, false)
        defer closeListener()
 
        // Put a test block
@@ -546,7 +449,7 @@ func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
        // keepclient with no such keep server
        kc := keepclient.New(&arv)
        locals := map[string]string{
-               "proxy": "http://localhost:12345",
+               TestProxyUUID: "http://localhost:12345",
        }
        kc.SetServiceRoots(locals, nil, nil)
 
@@ -567,22 +470,24 @@ func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
 }
 
 func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) {
-       kc := runProxy(c, []string{"keepproxy"}, 29999, false)
-       waitForListener()
+       kc := runProxy(c, nil, false)
        defer closeListener()
 
-       // Ask should result in temporary connection refused error
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
-       _, _, err := kc.Ask(hash)
-       c.Check(err, NotNil)
-       errNotFound, _ := err.(*keepclient.ErrNotFound)
-       c.Check(errNotFound.Temporary(), Equals, true)
-       c.Assert(strings.Contains(err.Error(), "HTTP 502"), Equals, true)
-
-       // Get should result in temporary connection refused error
-       _, _, _, err = kc.Get(hash)
-       c.Check(err, NotNil)
-       errNotFound, _ = err.(*keepclient.ErrNotFound)
-       c.Check(errNotFound.Temporary(), Equals, true)
-       c.Assert(strings.Contains(err.Error(), "HTTP 502"), Equals, true)
+       for _, f := range []func()error {
+               func() error {
+                       _, _, err := kc.Ask(hash)
+                       return err
+               },
+               func() error {
+                       _, _, _, err := kc.Get(hash)
+                       return err
+               },
+       } {
+               err := f()
+               c.Assert(err, NotNil)
+               errNotFound, _ := err.(*keepclient.ErrNotFound)
+               c.Check(errNotFound.Temporary(), Equals, true)
+               c.Check(err, ErrorMatches, `.*HTTP 502.*`)
+       }
 }
index 3a3069ab7745c1efc0a21fd8c706565f65c525e0..52b59ec1ef797b1e09b25529b20285f5190166df 100644 (file)
@@ -82,6 +82,8 @@ func TestPullWorkerIntegration_GetNonExistingLocator(t *testing.T) {
        }
 
        pullRequest := SetupPullWorkerIntegrationTest(t, testData, false)
+       defer arvadostest.StopAPI()
+       defer arvadostest.StopKeep(2)
 
        performPullWorkerIntegrationTest(testData, pullRequest, t)
 }
@@ -97,6 +99,8 @@ func TestPullWorkerIntegration_GetExistingLocator(t *testing.T) {
        }
 
        pullRequest := SetupPullWorkerIntegrationTest(t, testData, true)
+       defer arvadostest.StopAPI()
+       defer arvadostest.StopKeep(2)
 
        performPullWorkerIntegrationTest(testData, pullRequest, t)
 }
index 9432a0d383b534236ff506c83ac98ed0c196c324..94281fa8bcbb89f5432614adf715376ad666beab 100644 (file)
@@ -470,9 +470,11 @@ func (s *DoMainTestSuite) Test_doMainWithSrcAndDstConfig(c *C) {
        args := []string{"-src", srcConfig.Name(), "-dst", dstConfig.Name()}
        os.Args = append(os.Args, args...)
 
-       // Start keepservers. Since we are not doing any tweaking as in setupRsync func,
-       // kcSrc and kcDst will be the same and no actual copying to dst will happen, but that's ok.
+       // Start keepservers. Since we are not doing any tweaking as
+       // in setupRsync func, kcSrc and kcDst will be the same and no
+       // actual copying to dst will happen, but that's ok.
        arvadostest.StartKeep(2, false)
+       defer arvadostest.StopKeep(2)
 
        err := doMain()
        c.Check(err, IsNil)