5824: Support configuration with a download-only host.
authorTom Clegg <tom@curoverse.com>
Tue, 10 Nov 2015 06:29:11 +0000 (01:29 -0500)
committerTom Clegg <tom@curoverse.com>
Tue, 10 Nov 2015 06:29:11 +0000 (01:29 -0500)
apps/workbench/app/controllers/collections_controller.rb
apps/workbench/config/application.default.yml
apps/workbench/test/controllers/collections_controller_test.rb
doc/install/install-keep-web.html.textile.liquid

index 2228a07e556898d58c4ec9534d228141437a9215..847e5eedab728185f8b438d4ddb8f57f1e6819ba 100644 (file)
@@ -137,7 +137,8 @@ class CollectionsController < ApplicationController
       return
     end
 
-    if Rails.configuration.keep_web_url
+    if Rails.configuration.keep_web_url or
+        Rails.configuration.keep_web_download_url
       opts = {}
       if usable_token == params[:reader_token]
         opts[:path_token] = usable_token
@@ -327,7 +328,37 @@ class CollectionsController < ApplicationController
   def keep_web_url(uuid_or_pdh, file, opts)
     munged_id = uuid_or_pdh.sub('+', '-')
     fmt = {uuid_or_pdh: munged_id}
-    uri = URI.parse(Rails.configuration.keep_web_url % fmt)
+
+    tmpl = Rails.configuration.keep_web_url
+    if Rails.configuration.keep_web_download_url and
+        (!tmpl or opts[:disposition] == 'attachment')
+      # Prefer the attachment-only-host when we want an attachment
+      # (and when there is no preview link configured)
+      tmpl = Rails.configuration.keep_web_download_url
+    else
+      test_uri = URI.parse(tmpl % fmt)
+      if opts[:query_token] and
+          not test_uri.host.start_with?(munged_id + "--") and
+          not test_uri.host.start_with?(munged_id + ".")
+        # We're about to pass a token in the query string, but
+        # keep-web can't accept that safely at a single-origin URL
+        # template (unless it's -attachment-only-host).
+        tmpl = Rails.configuration.keep_web_download_url
+        if not tmpl
+          raise ArgumentError, "Download precluded by site configuration"
+        end
+        logger.warn("Using download link, even though inline content " \
+                    "was requested: #{test_uri.to_s}")
+      end
+    end
+
+    if tmpl == Rails.configuration.keep_web_download_url
+      # This takes us to keep-web's -attachment-only-host so there is
+      # no need to add ?disposition=attachment.
+      opts.delete :disposition
+    end
+
+    uri = URI.parse(tmpl % fmt)
     uri.path += '/' unless uri.path.end_with? '/'
     if opts[:path_token]
       uri.path += 't=' + opts[:path_token] + '/'
@@ -346,17 +377,6 @@ class CollectionsController < ApplicationController
       uri.query = query.to_query
     end
 
-    if query.include? 'api_token' and
-        query['disposition'] != 'attachment' and
-        not uri.host.start_with?(munged_id + "--") and
-        not uri.host.start_with?(munged_id + ".")
-      # keep-web refuses query tokens ("?api_token=X") unless it sees
-      # the collection ID in the hostname, or is running in
-      # attachment-only mode.
-      logger.warn("Single-origin keep_web_url can't serve inline content, " \
-                  "but redirecting anyway: #{uri.to_s}")
-    end
-
     uri.to_s
   end
 
index 0a9ee9f11663f5f830b19c89dd5a4650c3c804ff..dd499772e47d95bb8a9abc8af9addd8ccecdb1be 100644 (file)
@@ -226,17 +226,29 @@ common:
   # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
   shell_in_a_box_url: false
 
-  # Format of download/preview links. If false, use Workbench's
-  # download facility.
+  # Format of preview links. If false, use keep_web_download_url
+  # instead, and disable inline preview. If both are false, use
+  # Workbench's built-in download mechanism.
   #
   # Examples:
   # keep_web_url: https://%{uuid_or_pdh}.collections.uuid_prefix.arvadosapi.com
   # keep_web_url: https://%{uuid_or_pdh}--collections.uuid_prefix.arvadosapi.com
   #
-  # Example supporting only public data and collection-sharing links:
-  # keep_web_url: https://collections.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
-  #
-  # Example supporting only download/attachment: (using keep-web
-  # -attachment-only-host collections.uuid_prefix.arvadosapi.com):
+  # Example supporting only public data and collection-sharing links
+  # (other data will be handled as downloads via keep_web_download_url):
   # keep_web_url: https://collections.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
   keep_web_url: false
+
+  # Format of download links. If false, use keep_web_url with
+  # disposition=attachment query param.
+  #
+  # The host here should match the -attachment-only-host argument
+  # given to keep-web.
+  #
+  # If keep_web_download_url is false, and keep_web_url uses a
+  # single-origin form, then Workbench will show an error page
+  # when asked to download or preview private data.
+  #
+  # Example:
+  # keep_web_download_url: https://download.uuid_prefix.arvadosapi.com/c=%{uuid_or_pdh}
+  keep_web_download_url: false
index c6db9de552909bef4a128685ccd13a93e88dcdfc..8c42145962af30718a4b6aa52c6b23e7dcd35307 100644 (file)
@@ -521,8 +521,9 @@ class CollectionsControllerTest < ActionController::TestCase
     assert_not_includes @response.body, '<a href="#Upload"'
   end
 
-  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.collections.zzzzz.example'
+  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.example', dl_cfg=false
     Rails.configuration.keep_web_url = cfg
+    Rails.configuration.keep_web_download_url = dl_cfg
     @controller.expects(:file_enumerator).never
   end
 
@@ -533,7 +534,7 @@ class CollectionsControllerTest < ActionController::TestCase
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
       assert_response :redirect
-      assert_equal "https://#{id.sub '+', '-'}.collections.zzzzz.example/_/w+a+z?api_token=#{tok}", @response.redirect_url
+      assert_equal "https://#{id.sub '+', '-'}.example/_/w+a+z?api_token=#{tok}", @response.redirect_url
     end
 
     test "Redirect to keep_web_url via #{id_type} with reader token" do
@@ -542,7 +543,7 @@ class CollectionsControllerTest < ActionController::TestCase
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
       assert_response :redirect
-      assert_equal "https://#{id.sub '+', '-'}.collections.zzzzz.example/t=#{tok}/_/w+a+z", @response.redirect_url
+      assert_equal "https://#{id.sub '+', '-'}.example/t=#{tok}/_/w+a+z", @response.redirect_url
     end
 
     test "Redirect to keep_web_url via #{id_type} with no token" do
@@ -551,7 +552,7 @@ class CollectionsControllerTest < ActionController::TestCase
       id = api_fixture('collections')['public_text_file'][id_type]
       get :show_file, {uuid: id, file: "Hello World.txt"}
       assert_response :redirect
-      assert_equal "https://#{id.sub '+', '-'}.collections.zzzzz.example/_/Hello+World.txt", @response.redirect_url
+      assert_equal "https://#{id.sub '+', '-'}.example/_/Hello+World.txt", @response.redirect_url
     end
 
     test "Redirect to keep_web_url via #{id_type} with disposition param" do
@@ -564,16 +565,17 @@ class CollectionsControllerTest < ActionController::TestCase
         disposition: 'attachment',
       }
       assert_response :redirect
-      assert_equal "https://#{id.sub '+', '-'}.collections.zzzzz.example/_/Hello+World.txt?disposition=attachment", @response.redirect_url
+      assert_equal "https://#{id.sub '+', '-'}.example/_/Hello+World.txt?disposition=attachment", @response.redirect_url
     end
 
-    test "Redirect to keep_web_url via #{id_type} using -attachment-only-host mode" do
-      setup_for_keep_web 'https://collections.zzzzz.example/c=%{uuid_or_pdh}'
+    test "Redirect to keep_web_download_url via #{id_type}" do
+      setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
+                         'https://download.example/c=%{uuid_or_pdh}')
       tok = api_fixture('api_client_authorizations')['active']['api_token']
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
       assert_response :redirect
-      assert_equal "https://collections.zzzzz.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
+      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
     end
   end
 
@@ -585,5 +587,42 @@ class CollectionsControllerTest < ActionController::TestCase
       get :show_file, {uuid: id, file: "w a z"}, session_for(:spectator)
       assert_response 404
     end
+
+    test "Redirect download to keep_web_download_url, anon #{anon}" do
+      config_anonymous anon
+      setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
+                         'https://download.example/c=%{uuid_or_pdh}')
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['public_text_file']['uuid']
+      get :show_file, {
+        uuid: id,
+        file: 'Hello world.txt',
+        disposition: 'attachment',
+      }, session_for(:active)
+      assert_response :redirect
+      expect_url = "https://download.example/c=#{id.sub '+', '-'}/_/Hello+world.txt"
+      if not anon
+        expect_url += "?api_token=#{tok}"
+      end
+      assert_equal expect_url, @response.redirect_url
+    end
+  end
+
+  test "Error if file is impossible to retrieve from keep_web_url" do
+    # Cannot pass a session token using a single-origin keep-web URL,
+    # cannot read this collection without a session token.
+    setup_for_keep_web 'https://collections.example/c=%{uuid_or_pdh}', false
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+    assert_response 422
+  end
+
+  test "Redirect preview to keep_web_download_url when preview is disabled" do
+    setup_for_keep_web false, 'https://download.example/c=%{uuid_or_pdh}'
+    tok = api_fixture('api_client_authorizations')['active']['api_token']
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+    assert_response :redirect
+    assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
   end
 end
index 11a425d3476d5ae69ba74f0e81ab8dcd14d219fa..dedce5e17e717232004b3e008de3657bdb59ba46 100644 (file)
@@ -6,14 +6,15 @@ title: Install the keep-web server
 
 The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
 
-By convention, we use the following hostname for the keep-web service:
+By convention, we use the following hostnames for the keep-web service:
 
 <notextile>
-<pre><code>collections.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>download.<span class="userinput">uuid_prefix</span>.your.domain
+collections.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
 
-This hostname should resolve from anywhere on the internet.
+The above hostnames should resolve from anywhere on the internet.
 
 h2. Install keep-web
 
@@ -59,7 +60,11 @@ We recommend running @keep-web@ under "runit":https://packages.debian.org/search
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
 export ARVADOS_API_TOKEN="<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span>"
-exec sudo -u nobody keep-web -listen=<span class="userinput">:9002</span> -allow-anonymous 2&gt;&amp;1
+exec sudo -u nobody keep-web \
+ -listen=<span class="userinput">:9002</span> \
+ -attachment-only-host=<span class="userinput">download.uuid_prefix.your.domain</span> \
+ -allow-anonymous \
+ 2&gt;&amp;1
 </code></pre>
 </notextile>
 
@@ -84,7 +89,10 @@ upstream keep-web {
 
 server {
   listen                <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name           collections.<span class="userinput">uuid_prefix</span>.your.domain *.collections.<span class="userinput">uuid_prefix</span>.your.domain ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
+  server_name           download.<span class="userinput">uuid_prefix</span>.your.domain
+                        collections.<span class="userinput">uuid_prefix</span>.your.domain
+                        *.collections.<span class="userinput">uuid_prefix</span>.your.domain
+                        ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
 
   proxy_connect_timeout 90s;
   proxy_read_timeout    300s;
@@ -104,17 +112,27 @@ server {
 h3. Configure DNS
 
 Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
-* @*--collections.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
-* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
-* @collections.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
+* @download.uuid_prefix.your.domain@
+* @collections.uuid_prefix.your.domain@
+* @*--collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for @*.uuid_prefix.your.domain@ and your DNS server allows this without interfering with other DNS names.
+* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names.
+
+If neither of the above wildcard options is feasible, only unauthenticated requests (public data and collection sharing links) will be served as web content at @collections.uuid_prefix.your.domain@. The @download@ name will be used to serve authenticated content, but only as file downloads.
 
 h3. Tell Workbench about the keep-web service
 
-Add *one* of the following entries to your Workbench configuration file (@/etc/arvados/workbench/application.yml@), depending on your DNS setup:
+Add the following entry to your Workbench configuration file (@/etc/arvados/workbench/application.yml@):
+
+<notextile>
+<pre><code>keep_web_download_url: https://download.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+</code></pre>
+</notextile>
+
+Additionally, add *one* of the following entries to your Workbench configuration file, depending on your DNS setup:
 
 <notextile>
 <pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
 keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
-keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
 </code></pre>
 </notextile>