-// Keep-web provides read-only HTTP access to files stored in Keep. It
-// serves public data to anonymous and unauthenticated clients, and
-// serves private data to clients that supply Arvados API tokens. It
-// can be installed anywhere with access to Keep services, typically
-// behind a web proxy that supports TLS.
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Keep-web provides read/write HTTP (WebDAV) access to files stored
+// in Keep. It serves public data to anonymous and unauthenticated
+// clients, and serves private data to clients that supply Arvados API
+// tokens. It can be installed anywhere with access to Keep services,
+// typically behind a web proxy that supports TLS.
//
// See http://doc.arvados.org/install/install-keep-web.html.
//
-// Run "keep-web -help" to show all supported options.
+// Configuration
+//
+// The default configuration file location is
+// /etc/arvados/keep-web/keep-web.yml.
+//
+// Example configuration file
+//
+// Client:
+// APIHost: "zzzzz.arvadosapi.com:443"
+// AuthToken: ""
+// Insecure: false
+// Listen: :1234
+// AnonymousTokens:
+// - xxxxxxxxxxxxxxxxxxxx
+// AttachmentOnlyHost: ""
+// TrustAllContent: false
//
// Starting the server
//
-// Serve HTTP requests at port 1234 on all interfaces:
+// Start a server using the default config file
+// /etc/arvados/keep-web/keep-web.yml:
//
-// keep-web -address=:1234
+// keep-web
//
-// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
+// Start a server using the config file /path/to/keep-web.yml:
//
-// keep-web -address=1.2.3.4:1234
+// keep-web -config /path/to/keep-web.yml
//
// Proxy configuration
//
-// Keep-web does not support SSL natively. Typically, it is installed
+// Keep-web does not support TLS natively. Typically, it is installed
// behind a proxy like nginx.
//
// Here is an example nginx configuration.
// }
// server {
// listen *:443 ssl;
-// server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+// server_name collections.example.com *.collections.example.com ~.*--collections.example.com;
// ssl_certificate /root/wildcard.example.com.crt;
// ssl_certificate_key /root/wildcard.example.com.key;
// location / {
// proxy. However, TLS is not used between nginx and keep-web, so
// intervening networks must be secured by other means.
//
+// Anonymous downloads
+//
+// The "AnonymousTokens" configuration entry is an array of tokens to
+// use when processing anonymous requests, i.e., whenever a web client
+// does not supply its own Arvados API token via path, query string,
+// cookie, or request header.
+//
+// "AnonymousTokens":["xxxxxxxxxxxxxxxxxxxxxxx"]
+//
+// See http://doc.arvados.org/install/install-keep-web.html for examples.
+//
// Download URLs
//
// The following "same origin" URL patterns are supported for public
-// collections (i.e., collections which can be served by keep-web
-// without making use of any credentials supplied by the client). See
-// "Same-origin URLs" below.
+// collections and collections shared anonymously via secret links
+// (i.e., collections which can be served by keep-web without making
+// use of any implicit credentials like cookies). See "Same-origin
+// URLs" below.
//
-// http://dl.example.com/c=uuid_or_pdh/path/file.txt
-// http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
+// http://collections.example.com/c=uuid_or_pdh/path/file.txt
+// http://collections.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
//
// The following "multiple origin" URL patterns are supported for all
// collections:
//
-// http://uuid_or_pdh--dl.example.com/path/file.txt
-// http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+// http://uuid_or_pdh--collections.example.com/path/file.txt
+// http://uuid_or_pdh--collections.example.com/t=TOKEN/path/file.txt
//
// In the "multiple origin" form, the string "--" can be replaced with
-// "." with identical results (assuming the upstream proxy is
+// "." with identical results (assuming the downstream proxy is
// configured accordingly). These two are equivalent:
//
-// http://uuid_or_pdh--dl.example.com/path/file.txt
-// http://uuid_or_pdh.dl.example.com/path/file.txt
+// http://uuid_or_pdh--collections.example.com/path/file.txt
+// http://uuid_or_pdh.collections.example.com/path/file.txt
//
-// The first form ("uuid_or_pdh--dl.example.com") minimizes the cost
-// and effort of deploying a wildcard TLS certificate for
-// *.dl.example.com. The second form is likely to be easier to
-// configure, and more efficient to run, on an upstream proxy.
+// The first form (with "--" instead of ".") avoids the cost and
+// effort of deploying a wildcard TLS certificate for
+// *.collections.example.com at sites that already have a wildcard
+// certificate for *.example.com. The second form is likely to be
+// easier to configure, and more efficient to run, on a downstream
+// proxy.
//
-// In all of the above forms, the "dl.example.com" part can be
-// anything at all: keep-web itself ignores everything after the first
-// "." or "--". (Of course, in order for clients to connect at all,
-// DNS and any relevant proxies must be configured accordingly.)
+// In all of the above forms, the "collections.example.com" part can
+// be anything at all: keep-web itself ignores everything after the
+// first "." or "--". (Of course, in order for clients to connect at
+// all, DNS and any relevant proxies must be configured accordingly.)
//
// In all of the above forms, the "uuid_or_pdh" part can be either a
// collection UUID or a portable data hash with the "+" character
-// optionally replaced by "-". (Replacing "+" with "-" is mandatory
-// when "uuid_or_pdh" appears in the domain name only because "+" is
+// optionally replaced by "-". (When "uuid_or_pdh" appears in the
+// domain name, replacing "+" with "-" is mandatory, because "+" is
// not a valid character in a domain name.)
//
// In all of the above forms, a top level directory called "_" is
// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
// interchangeable:
//
-// http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo/bar.txt
-// http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo/bar.txt
-// http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo/bar.txt
+// http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/foo/bar.txt
+// http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/_/foo/bar.txt
+// http://zzzzz-4zz18-znfnqtbbv4spc3w--collections.example.com/_/foo/bar.txt
+//
+// The following URLs are read-only, but otherwise interchangeable
+// with the above:
+//
// http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo/bar.txt
// http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo/bar.txt
+// http://collections.example.com/by_id/1f4b0bc7583c2a7f9102c395f4ffc5e3%2B45/foo/bar.txt
+// http://collections.example.com/by_id/zzzzz-4zz18-znfnqtbbv4spc3w/foo/bar.txt
+//
+// If the collection is named "MyCollection" and located in a project
+// called "MyProject" which is in the home project of a user with
+// username is "bob", the following read-only URL is also available
+// when authenticating as bob:
+//
+// http://collections.example.com/users/bob/MyProject/MyCollection/foo/bar.txt
//
// An additional form is supported specifically to make it more
// convenient to maintain support for existing Workbench download
// links:
//
-// http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/foo/bar.txt
+// http://collections.example.com/collections/download/uuid_or_pdh/TOKEN/foo/bar.txt
//
// A regular Workbench "download" link is also accepted, but
// credentials passed via cookie, header, etc. are ignored. Only
// public data can be served this way:
//
-// http://dl.example.com/collections/uuid_or_pdh/foo/bar.txt
+// http://collections.example.com/collections/uuid_or_pdh/foo/bar.txt
+//
+// Collections can also be accessed (read-only) via "/by_id/X" where X
+// is a UUID or portable data hash.
//
// Authorization mechanisms
//
// the token stripped from the query string and added to a cookie
// instead.
//
+// Indexes
+//
+// Keep-web returns a generic HTML index listing when a directory is
+// requested with the GET method. It does not serve a default file
+// like "index.html". Directory listings are also returned for WebDAV
+// PROPFIND requests.
+//
// Compatibility
//
// Client-provided authorization tokens are ignored if the client does
// In such cases -- for example, a site which is not reachable from
// the internet, where some data is world-readable from Arvados's
// perspective but is intended to be available only to users within
-// the local network -- the upstream proxy should configured to return
-// 401 for all paths beginning with "/c=".
+// the local network -- the downstream proxy should configured to
+// return 401 for all paths beginning with "/c=".
//
// Same-origin URLs
//
// current viewer's credentials to download additional data from
// collection Y -- data which is accessible to the current viewer, but
// not to the author of collection X -- from the same origin
-// (``https://dl.example.com/'') and upload it to some other site
-// chosen by the author of collection X.
+// (``https://collections.example.com/'') and upload it to some other
+// site chosen by the author of collection X.
//
// Attachment-Only host
//
// will be accepted and all responses will have a
// "Content-Disposition: attachment" header. This behavior is invoked
// only when the designated origin matches exactly the Host header
-// provided by the client or upstream proxy.
+// provided by the client or downstream proxy.
//
-// keep-web -address :9999 -attachment-only-host domain.example:9999
+// "AttachmentOnlyHost":"domain.example:9999"
//
// Trust All Content mode
//
-// In "trust all content" mode, Keep-web will accept credentials (API
+// In TrustAllContent mode, Keep-web will accept credentials (API
// tokens) and serve any collection X at
-// "https://dl.example.com/collections/X/path/file.ext". This is
+// "https://collections.example.com/c=X/path/file.ext". This is
// UNSAFE except in the special case where everyone who is able write
// ANY data to Keep, and every JavaScript and HTML file written to
// Keep, is also trusted to read ALL of the data in Keep.
//
// In such cases you can enable trust-all-content mode.
//
-// keep-web -address :9999 -trust-all-content
+// "TrustAllContent":true
//
-// When using trust-all-content mode, the only effect of the
-// -attachment-only-host option is to add a "Content-Disposition:
+// When TrustAllContent is enabled, the only effect of the
+// AttachmentOnlyHost flag is to add a "Content-Disposition:
// attachment" header.
//
-// keep-web -address :9999 -attachment-only-host domain.example:9999 -trust-all-content
+// "AttachmentOnlyHost":"domain.example:9999",
+// "TrustAllContent":true
+//
+// Depending on your site configuration, you might also want to enable
+// the "trust all content" setting in Workbench. Normally, Workbench
+// avoids redirecting requests to keep-web if they depend on
+// TrustAllContent being enabled.
//
package main