Merge branch 'master' into 7661-fuse-by-pdh
authorradhika <radhika@curoverse.com>
Mon, 9 Nov 2015 15:43:13 +0000 (10:43 -0500)
committerradhika <radhika@curoverse.com>
Mon, 9 Nov 2015 15:43:13 +0000 (10:43 -0500)
49 files changed:
doc/_config.yml
doc/install/install-compute-node.html.textile.liquid
doc/install/install-crunch-dispatch.html.textile.liquid
doc/install/install-keep-web.html.textile.liquid [new file with mode: 0644]
doc/install/install-keepproxy.html.textile.liquid
sdk/cli/bin/crunch-job
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/bin/arvados-cwl-runner [new file with mode: 0755]
sdk/cwl/setup.py
sdk/go/arvadosclient/arvadosclient.go
sdk/go/arvadosclient/arvadosclient_test.go
sdk/go/arvadostest/fixtures.go [new file with mode: 0644]
sdk/go/auth/auth.go
sdk/go/keepclient/collectionreader.go [new file with mode: 0644]
sdk/go/keepclient/collectionreader_test.go [new file with mode: 0644]
sdk/go/keepclient/keepclient.go
sdk/go/keepclient/keepclient_test.go
sdk/go/keepclient/support.go
sdk/go/manifest/manifest.go
sdk/go/manifest/manifest_test.go
sdk/python/arvados/commands/run.py
sdk/python/tests/run_test_server.py
services/api/lib/create_superuser_token.rb [new file with mode: 0755]
services/api/script/create_superuser_token.rb
services/api/test/fixtures/api_client_authorizations.yml
services/api/test/fixtures/collections.yml
services/api/test/unit/create_superuser_token_test.rb [new file with mode: 0644]
services/crunchstat/crunchstat_test.go
services/datamanager/datamanager_test.go
services/datamanager/keep/keep.go
services/dockercleaner/arvados_docker/cleaner.py
services/dockercleaner/tests/test_cleaner.py
services/keep-web/.gitignore [new file with mode: 0644]
services/keep-web/anonymous.go [new file with mode: 0644]
services/keep-web/doc.go [new file with mode: 0644]
services/keep-web/handler.go [new file with mode: 0644]
services/keep-web/handler_test.go [new file with mode: 0644]
services/keep-web/main.go [new file with mode: 0644]
services/keep-web/server.go [new file with mode: 0644]
services/keep-web/server_test.go [new file with mode: 0644]
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy_test.go
services/keepstore/keepstore.go
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_daemon.py
tools/keep-rsync/keep-rsync_test.go

index 75cb997d78b0fe0d80d869023185a11dbfa0bfbd..2f37f5af0803c360f2c0cce2d08389d64b21249f 100644 (file)
@@ -155,6 +155,7 @@ navbar:
       - install/install-keepstore.html.textile.liquid
       - install/configure-azure-blob-storage.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
+      #- install/install-keep-web.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
     - Helpful hints:
index 250d1dcc40363bb4fc73c116ee6edb345bd3a280..aa4f37d639704f33dc10b2f9e71db1c36a6c129a 100644 (file)
@@ -78,6 +78,10 @@ h2. Configure the Docker cleaner
 
 The arvados-docker-cleaner program removes least recently used docker images as needed to keep disk usage below a configured limit.
 
+{% include 'notebox_begin' %}
+This also removes all containers as soon as they exit, as if they were run with `docker run --rm`. If you need to debug or inspect containers after they stop, temporarily stop arvados-docker-cleaner or run it with `--remove-stopped-containers never`.
+{% include 'notebox_end' %}
+
 On Debian-based systems, install runit:
 
 <notextile>
index 370a6e7c3d04fce578ccc1c231e82ec0e44960f6..d632f9bbd61974966499090bed0c71949633edac 100644 (file)
@@ -185,7 +185,7 @@ export RAILS_ENV=production
 export CRUNCH_JOB_DOCKER_BIN=<span class="userinput">docker.io</span>
 
 fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
-cd /var/www/arvados-api/services/api
+cd /var/www/arvados-api/current
 exec $rvmexec bundle exec ./script/crunch-dispatch.rb 2>&1
 </code></pre>
 </notextile>
diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
new file mode 100644 (file)
index 0000000..11a425d
--- /dev/null
@@ -0,0 +1,120 @@
+---
+layout: default
+navsection: installguide
+title: Install the keep-web server
+...
+
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
+
+By convention, we use the following hostname for the keep-web service:
+
+<notextile>
+<pre><code>collections.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+This hostname should resolve from anywhere on the internet.
+
+h2. Install keep-web
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install keep-web</span>
+</code></pre>
+</notextile>
+
+Verify that @keep-web@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keep-web -h</span>
+Usage of keep-web:
+  -allow-anonymous
+        Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection. (default false)
+  -attachment-only-host string
+        Accept credentials, and add "Content-Disposition: attachment" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.
+  -listen string
+        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+  -trust-all-content
+        Serve non-public content from a single origin. Dangerous: read docs before using!
+</code></pre>
+</notextile>
+
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
+
+We recommend running @keep-web@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keep-web@ is:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+export ARVADOS_API_TOKEN="<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span>"
+exec sudo -u nobody keep-web -listen=<span class="userinput">:9002</span> -allow-anonymous 2&gt;&amp;1
+</code></pre>
+</notextile>
+
+Omit the @-allow-anonymous@ argument if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
+h3. Set up a reverse proxy with SSL support
+
+The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
+
+Note: A wildcard SSL certificate is required in order to proxy keep-web effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keep-web {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           collections.<span class="userinput">uuid_prefix</span>.your.domain *.collections.<span class="userinput">uuid_prefix</span>.your.domain ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keep-web;
+    proxy_set_header    Host            $host;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @*--collections.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
+* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
+* @collections.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
+
+h3. Tell Workbench about the keep-web service
+
+Add *one* of the following entries to your Workbench configuration file (@/etc/arvados/workbench/application.yml@), depending on your DNS setup:
+
+<notextile>
+<pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
index 6a531a37848d2c2eaa5af34e20fb63c32351f4bf..5a5b66aaaef98c1ee2e42525df2c881655baf3ec 100644 (file)
@@ -4,9 +4,12 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for clients located elsewhere on the internet. Specifically, in contrast to Keepstore:
+* A client writing through Keepproxy generates less network traffic: the client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers.
+* A client can write through Keepproxy without precomputing content hashes. Notably, the browser-based upload feature in Workbench requires Keepproxy.
+* Keepproxy checks API token validity before processing requests. (Clients that can connect directly to Keepstore can use it as scratch space even without a valid API token.)
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).
@@ -36,12 +39,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
@@ -52,7 +56,7 @@ The Keepproxy server needs a token to talk to the API server.
 On the <strong>API server</strong>, use the following command to create the token:
 
 <notextile>
-<pre><code>~/arvados/services/api/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
 hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
 </code></pre></notextile>
 
index a8de76bec210febd0720102279b0db8cce250335..e2a4e264c3697e916ca8c88f98cf3a88d80e0e9a 100755 (executable)
@@ -467,7 +467,7 @@ fi
     }
     srun(["srun", "--nodelist=" . $node[0]],
          ["/bin/sh", "-ec",
-          "a=`$docker_bin run --rm $try_user_arg $docker_hash id --user` && " .
+          "a=`$docker_bin run $try_user_arg $docker_hash id --user` && " .
           " test \$a -ne 0"],
          {fork => 1});
     if ($? == 0) {
@@ -896,9 +896,10 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     $command .= "&& exec arv-mount --by-pdh --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
     if ($docker_hash)
     {
-      my $cidfile = "$ENV{CRUNCH_TMP}/$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}.cid";
+      my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
+      my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
       $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
-      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
+      $command .= "$docker_bin run --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
       # We only set memory limits if Docker lets us limit both memory and swap.
       # Memory limits alone have been supported longer, but subprocesses tend
       # to get SIGKILL if they exceed that without any swap limit set.
index f3298ec4fa30627be96216f82b5609bc9619cd07..4198c34482ccd0f6fa54daa9e5a9d1d143db2ee1 100644 (file)
@@ -14,6 +14,7 @@ import fnmatch
 import logging
 import re
 import os
+
 from cwltool.process import get_feature
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -40,24 +41,37 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image):
 
     return dockerRequirement["dockerImageId"]
 
-class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
+
+class CollectionFsAccess(cwltool.process.StdFsAccess):
     def __init__(self, basedir):
         self.collections = {}
         self.basedir = basedir
 
     def get_collection(self, path):
         p = path.split("/")
-        if arvados.util.keep_locator_pattern.match(p[0]):
-            if p[0] not in self.collections:
-                self.collections[p[0]] = arvados.collection.CollectionReader(p[0])
-            return (self.collections[p[0]], "/".join(p[1:]))
+        if p[0].startswith("keep:") and arvados.util.keep_locator_pattern.match(p[0][5:]):
+            pdh = p[0][5:]
+            if pdh not in self.collections:
+                self.collections[pdh] = arvados.collection.CollectionReader(pdh)
+            return (self.collections[pdh], "/".join(p[1:]))
         else:
             return (None, path)
 
     def _match(self, collection, patternsegments, parent):
+        if not patternsegments:
+            return []
+
+        if not isinstance(collection, arvados.collection.RichCollectionBase):
+            return []
+
         ret = []
+        # iterate over the files and subcollections in 'collection'
         for filename in collection:
-            if fnmatch.fnmatch(filename, patternsegments[0]):
+            if patternsegments[0] == '.':
+                # Pattern contains something like "./foo" so just shift
+                # past the "./"
+                ret.extend(self._match(collection, patternsegments[1:], parent))
+            elif fnmatch.fnmatch(filename, patternsegments[0]):
                 cur = os.path.join(parent, filename)
                 if len(patternsegments) == 1:
                     ret.append(cur)
@@ -68,7 +82,7 @@ class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
     def glob(self, pattern):
         collection, rest = self.get_collection(pattern)
         patternsegments = rest.split("/")
-        return self._match(collection, patternsegments, collection.manifest_locator())
+        return self._match(collection, patternsegments, "keep:" + collection.manifest_locator())
 
     def open(self, fn, mode):
         collection, rest = self.get_collection(fn)
@@ -97,15 +111,17 @@ class ArvadosJob(object):
 
         if self.generatefiles:
             vwd = arvados.collection.Collection()
+            script_parameters["task.vwd"] = {}
             for t in self.generatefiles:
                 if isinstance(self.generatefiles[t], dict):
-                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
+                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"].replace("$(task.keep)/", "keep:"))
                     vwd.copy(rest, t, source_collection=src)
                 else:
                     with vwd.open(t, "w") as f:
                         f.write(self.generatefiles[t])
             vwd.save_new()
-            script_parameters["task.vwd"] = vwd.portable_data_hash()
+            for t in self.generatefiles:
+                script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), t)
 
         script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
         if self.environment:
@@ -120,22 +136,26 @@ class ArvadosJob(object):
         (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
         if docker_req and kwargs.get("use_container") is not False:
             runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
-            runtime_constraints["arvados_sdk_version"] = "master"
 
-        response = self.arvrunner.api.jobs().create(body={
-            "script": "run-command",
-            "repository": "arvados",
-            "script_version": "master",
-            "script_parameters": script_parameters,
-            "runtime_constraints": runtime_constraints
-        }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+        try:
+            response = self.arvrunner.api.jobs().create(body={
+                "script": "crunchrunner",
+                "repository": kwargs["repository"],
+                "script_version": "master",
+                "script_parameters": {"tasks": [script_parameters]},
+                "runtime_constraints": runtime_constraints
+            }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+
+            self.arvrunner.jobs[response["uuid"]] = self
 
-        self.arvrunner.jobs[response["uuid"]] = self
+            logger.info("Job %s is %s", response["uuid"], response["state"])
 
-        logger.info("Job %s is %s", response["uuid"], response["state"])
+            if response["state"] in ("Complete", "Failed", "Cancelled"):
+                self.done(response)
+        except Exception as e:
+            logger.error("Got error %s" % str(e))
+            self.output_callback({}, "permanentFail")
 
-        if response["state"] in ("Complete", "Failed", "Cancelled"):
-            self.done(response)
 
     def done(self, record):
         try:
@@ -146,27 +166,28 @@ class ArvadosJob(object):
 
             try:
                 outputs = {}
-                outputs = self.collect_outputs(record["output"])
+                outputs = self.collect_outputs("keep:" + record["output"])
             except Exception as e:
-                logger.warn(str(e))
+                logger.exception("Got exception while collecting job outputs:")
                 processStatus = "permanentFail"
 
             self.output_callback(outputs, processStatus)
         finally:
             del self.arvrunner.jobs[record["uuid"]]
 
+
 class ArvPathMapper(cwltool.pathmapper.PathMapper):
     def __init__(self, arvrunner, referenced_files, basedir, **kwargs):
-        self._pathmap = {}
+        self._pathmap = arvrunner.get_uploaded()
         uploadfiles = []
 
-        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+/.+')
+        pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+')
 
         for src in referenced_files:
             if isinstance(src, basestring) and pdh_path.match(src):
-                self._pathmap[src] = (src, "/keep/%s" % src)
-            else:
-                ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+                self._pathmap[src] = (src, "$(task.keep)/%s" % src[5:])
+            if src not in self._pathmap:
+                ab = cwltool.pathmapper.abspath(src, basedir)
                 st = arvados.commands.run.statfile("", ab)
                 if kwargs.get("conformance_test"):
                     self._pathmap[src] = (src, ab)
@@ -178,16 +199,21 @@ class ArvPathMapper(cwltool.pathmapper.PathMapper):
                     raise cwltool.workflow.WorkflowException("Input file path '%s' is invalid" % st)
 
         if uploadfiles:
-            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], arvrunner.api, dry_run=kwargs.get("dry_run"), num_retries=3)
+            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
+                                             arvrunner.api,
+                                             dry_run=kwargs.get("dry_run"),
+                                             num_retries=3,
+                                             fnPattern="$(task.keep)/%s/%s")
 
         for src, ab, st in uploadfiles:
+            arvrunner.add_uploaded(src, (ab, st.fn))
             self._pathmap[src] = (ab, st.fn)
 
 
 
 class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
     def __init__(self, arvrunner, toolpath_object, **kwargs):
-        super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+        super(ArvadosCommandTool, self).__init__(toolpath_object, outdir="$(task.outdir)", tmpdir="$(task.tmpdir)", **kwargs)
         self.arvrunner = arvrunner
 
     def makeJobRunner(self):
@@ -204,6 +230,7 @@ class ArvCwlRunner(object):
         self.lock = threading.Lock()
         self.cond = threading.Condition(self.lock)
         self.final_output = None
+        self.uploaded = {}
 
     def arvMakeTool(self, toolpath_object, **kwargs):
         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
@@ -234,6 +261,12 @@ class ArvCwlRunner(object):
                         finally:
                             self.cond.release()
 
+    def get_uploaded(self):
+        return self.uploaded.copy()
+
+    def add_uploaded(self, src, pair):
+        self.uploaded[src] = pair
+
     def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
         events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
 
@@ -241,6 +274,7 @@ class ArvCwlRunner(object):
 
         kwargs["fs_access"] = self.fs_access
         kwargs["enable_reuse"] = args.enable_reuse
+        kwargs["repository"] = args.repository
 
         if kwargs.get("conformance_test"):
             return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
@@ -282,7 +316,7 @@ class ArvCwlRunner(object):
 
 def main(args, stdout, stderr, api_client=None):
     runner = ArvCwlRunner(api_client=arvados.api('v1'))
-    args.append("--leave-outputs")
+    args.insert(0, "--leave-outputs")
     parser = cwltool.main.arg_parser()
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--enable-reuse", action="store_true",
@@ -292,4 +326,6 @@ def main(args, stdout, stderr, api_client=None):
                         default=False, dest="enable_reuse",
                         help="")
 
+    parser.add_argument('--repository', type=str, default="peter/crunchrunner", help="Repository containing the 'crunchrunner' program.")
+
     return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
diff --git a/sdk/cwl/bin/arvados-cwl-runner b/sdk/cwl/bin/arvados-cwl-runner
new file mode 100755 (executable)
index 0000000..f31aefd
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+import sys
+
+from arvados_cwl import main
+
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
index 4812252e0c1ec49c612668b69f0fbcb4ca3c6b26..bcf6b963830aca8570545045ab112ee79aa8216d 100644 (file)
@@ -26,11 +26,12 @@ setup(name='arvados-cwl-runner',
       license='Apache 2.0',
       packages=find_packages(),
       scripts=[
-          'bin/cwl-runner'
+          'bin/cwl-runner',
+          'bin/arvados-cwl-runner'
       ],
       install_requires=[
-          'cwltool==1.0.20150722144138',
-          'arvados-python-client'
+          'cwltool>=1.0.20151026181844',
+          'arvados-python-client>=0.1.20151023214338'
       ],
       zip_safe=True,
       cmdclass={'egg_info': tagger},
index 1cce0a7fc92d24e21fa694add86c75c63952eb46..18e1074bf6f6c801c21593bc57586a902807f8b4 100644 (file)
@@ -14,6 +14,7 @@ import (
        "os"
        "regexp"
        "strings"
+       "time"
 )
 
 type StringMatcher func(string) bool
@@ -25,6 +26,12 @@ var MissingArvadosApiHost = errors.New("Missing required environment variable AR
 var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
 var ErrInvalidArgument = errors.New("Invalid argument")
 
+// A common failure mode is to reuse a keepalive connection that has been
+// terminated (in a way that we can't detect) for being idle too long.
+// POST and DELETE are not safe to retry automatically, so we minimize
+// such failures by always using a new or recently active socket.
+var MaxIdleConnectionDuration = 30 * time.Second
+
 // Indicates an error that was returned by the API server.
 type APIServerError struct {
        // Address of server returning error, of the form "host:port".
@@ -76,6 +83,8 @@ type ArvadosClient struct {
 
        // Discovery document
        DiscoveryDoc Dict
+
+       lastClosedIdlesAt time.Time
 }
 
 // Create a new ArvadosClient, initialized with standard Arvados environment
@@ -101,6 +110,8 @@ func MakeArvadosClient() (ac ArvadosClient, err error) {
                return ac, MissingArvadosApiToken
        }
 
+       ac.lastClosedIdlesAt = time.Now()
+
        return ac, err
 }
 
@@ -158,6 +169,15 @@ func (c ArvadosClient) CallRaw(method string, resourceType string, uuid string,
                req.Header.Add("X-External-Client", "1")
        }
 
+       // POST and DELETE are not safe to retry automatically, so we minimize
+       // such failures by always using a new or recently active socket
+       if method == "POST" || method == "DELETE" {
+               if time.Since(c.lastClosedIdlesAt) > MaxIdleConnectionDuration {
+                       c.lastClosedIdlesAt = time.Now()
+                       c.Client.Transport.(*http.Transport).CloseIdleConnections()
+               }
+       }
+
        // Make the request
        var resp *http.Response
        if resp, err = c.Client.Do(req); err != nil {
index 2c508dcb4a1100cf4609fc3ff3387ac089c6ab26..75af3ca51960a1b47a8ce9406ad3200823aac5db 100644 (file)
@@ -6,6 +6,7 @@ import (
        "net/http"
        "os"
        "testing"
+       "time"
 )
 
 // Gocheck boilerplate
@@ -102,39 +103,50 @@ func (s *ServerRequiredSuite) TestInvalidResourceType(c *C) {
 func (s *ServerRequiredSuite) TestCreatePipelineTemplate(c *C) {
        arv, err := MakeArvadosClient()
 
-       getback := make(Dict)
-       err = arv.Create("pipeline_templates",
-               Dict{"pipeline_template": Dict{
-                       "name": "tmp",
-                       "components": Dict{
-                               "c1": map[string]string{"script": "script1"},
-                               "c2": map[string]string{"script": "script2"}}}},
-               &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp")
-       c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
-
-       uuid := getback["uuid"].(string)
-
-       getback = make(Dict)
-       err = arv.Get("pipeline_templates", uuid, nil, &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp")
-       c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
-
-       getback = make(Dict)
-       err = arv.Update("pipeline_templates", uuid,
-               Dict{
-                       "pipeline_template": Dict{"name": "tmp2"}},
-               &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp2")
-
-       c.Assert(getback["uuid"].(string), Equals, uuid)
-       getback = make(Dict)
-       err = arv.Delete("pipeline_templates", uuid, nil, &getback)
-       c.Assert(err, Equals, nil)
-       c.Assert(getback["name"], Equals, "tmp2")
+       for _, idleConnections := range []bool{
+               false,
+               true,
+       } {
+               if idleConnections {
+                       arv.lastClosedIdlesAt = time.Now().Add(-time.Minute)
+               } else {
+                       arv.lastClosedIdlesAt = time.Now()
+               }
+
+               getback := make(Dict)
+               err = arv.Create("pipeline_templates",
+                       Dict{"pipeline_template": Dict{
+                               "name": "tmp",
+                               "components": Dict{
+                                       "c1": map[string]string{"script": "script1"},
+                                       "c2": map[string]string{"script": "script2"}}}},
+                       &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp")
+               c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
+
+               uuid := getback["uuid"].(string)
+
+               getback = make(Dict)
+               err = arv.Get("pipeline_templates", uuid, nil, &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp")
+               c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
+
+               getback = make(Dict)
+               err = arv.Update("pipeline_templates", uuid,
+                       Dict{
+                               "pipeline_template": Dict{"name": "tmp2"}},
+                       &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp2")
+
+               c.Assert(getback["uuid"].(string), Equals, uuid)
+               getback = make(Dict)
+               err = arv.Delete("pipeline_templates", uuid, nil, &getback)
+               c.Assert(err, Equals, nil)
+               c.Assert(getback["name"], Equals, "tmp2")
+       }
 }
 
 func (s *ServerRequiredSuite) TestErrorResponse(c *C) {
diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
new file mode 100644 (file)
index 0000000..3256ec2
--- /dev/null
@@ -0,0 +1,26 @@
+package arvadostest
+
+// IDs of API server's test fixtures
+const (
+       SpectatorToken        = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+       ActiveToken           = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+       AdminToken            = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
+       AnonymousToken        = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+       DataManagerToken      = "320mkve8qkswstz7ff61glpk3mhgghmg67wmic7elw4z41pke1"
+       FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
+       NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+       HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+       FooBarDirCollection   = "zzzzz-4zz18-foonbarfilesdir"
+       FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+       HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
+)
+
+// A valid manifest designed to test various edge cases and parsing
+// requirements
+const PathologicalManifest = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K@xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero@0 0:1:f 1:0:zero@1 1:4:ooba 4:0:zero@4 5:1:r 5:4:rbaz 9:0:zero@9\n" +
+       "./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+       "./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+       `./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+       `./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+       "./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+       ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
index 4a719e922dd8a98bdc88445f413ea88788b01a53..ca4eb948b1220c982ef25fa03c9175f40148a797 100644 (file)
@@ -1,6 +1,7 @@
 package auth
 
 import (
+       "encoding/base64"
        "net/http"
        "net/url"
        "strings"
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
        return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,10 +61,24 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
                a.Tokens = append(a.Tokens, val...)
        }
 
+       a.loadTokenFromCookie(r)
+
        // TODO: Load token from Rails session cookie (if Rails site
        // secret is known)
 }
 
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+       cookie, err := r.Cookie("arvados_api_token")
+       if err != nil || len(cookie.Value) == 0 {
+               return
+       }
+       token, err := DecodeTokenCookie(cookie.Value)
+       if err != nil {
+               return
+       }
+       a.Tokens = append(a.Tokens, string(token))
+}
+
 // TODO: LoadTokensFromHttpRequestBody(). We can't assume in
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
new file mode 100644 (file)
index 0000000..b532a16
--- /dev/null
@@ -0,0 +1,252 @@
+package keepclient
+
+import (
+       "errors"
+       "io"
+       "os"
+
+       "git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+// ReadCloserWithLen extends io.ReadCloser with a Len() method that
+// returns the total number of bytes available to read.
+type ReadCloserWithLen interface {
+       io.ReadCloser
+       Len() uint64
+}
+
+const (
+       // After reading a data block from Keep, cfReader slices it up
+       // and sends the slices to a buffered channel to be consumed
+       // by the caller via Read().
+       //
+       // dataSliceSize is the maximum size of the slices, and
+       // therefore the maximum number of bytes that will be returned
+       // by a single call to Read().
+       dataSliceSize = 1 << 20
+)
+
+// ErrNoManifest indicates the given collection has no manifest
+// information (e.g., manifest_text was excluded by a "select"
+// parameter when retrieving the collection record).
+var ErrNoManifest = errors.New("Collection has no manifest")
+
+// CollectionFileReader returns a ReadCloserWithLen that reads file
+// content from a collection. The filename must be given relative to
+// the root of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (ReadCloserWithLen, error) {
+       mText, ok := collection["manifest_text"].(string)
+       if !ok {
+               return nil, ErrNoManifest
+       }
+       m := manifest.Manifest{Text: mText}
+       rdrChan := make(chan *cfReader)
+       go kc.queueSegmentsToGet(m, filename, rdrChan)
+       r, ok := <-rdrChan
+       if !ok {
+               return nil, os.ErrNotExist
+       }
+       return r, nil
+}
+
+// Send segments for the specified file to r.toGet. Send a *cfReader
+// to rdrChan if the specified file is found (even if it's empty).
+// Then, close rdrChan.
+func (kc *KeepClient) queueSegmentsToGet(m manifest.Manifest, filename string, rdrChan chan *cfReader) {
+       defer close(rdrChan)
+
+       // q is a queue of FileSegments that we have received but
+       // haven't yet been able to send to toGet.
+       var q []*manifest.FileSegment
+       var r *cfReader
+       for seg := range m.FileSegmentIterByName(filename) {
+               if r == nil {
+                       // We've just discovered that the requested
+                       // filename does appear in the manifest, so we
+                       // can return a real reader (not nil) from
+                       // CollectionFileReader().
+                       r = newCFReader(kc)
+                       rdrChan <- r
+               }
+               q = append(q, seg)
+               r.totalSize += uint64(seg.Len)
+               // Send toGet as many segments as we can until it
+               // blocks.
+       Q:
+               for len(q) > 0 {
+                       select {
+                       case r.toGet <- q[0]:
+                               q = q[1:]
+                       default:
+                               break Q
+                       }
+               }
+       }
+       if r == nil {
+               // File not found.
+               return
+       }
+       close(r.countDone)
+       for _, seg := range q {
+               r.toGet <- seg
+       }
+       close(r.toGet)
+}
+
+type cfReader struct {
+       keepClient *KeepClient
+
+       // doGet() reads FileSegments from toGet, gets the data from
+       // Keep, and sends byte slices to toRead to be consumed by
+       // Read().
+       toGet chan *manifest.FileSegment
+
+       // toRead is a buffered channel, sized to fit one full Keep
+       // block. This lets us verify checksums without having a
+       // store-and-forward delay between blocks: by the time the
+       // caller starts receiving data from block N, cfReader is
+       // starting to fetch block N+1. A larger buffer would be
+       // useful for a caller whose read speed varies a lot.
+       toRead chan []byte
+
+       // bytes ready to send next time someone calls Read()
+       buf []byte
+
+       // Total size of the file being read. Not safe to read this
+       // until countDone is closed.
+       totalSize uint64
+       countDone chan struct{}
+
+       // First error encountered.
+       err error
+
+       // errNotNil is closed IFF err contains a non-nil error.
+       // Receiving from it will block until an error occurs.
+       errNotNil chan struct{}
+
+       // rdrClosed is closed IFF the reader's Close() method has
+       // been called. Any goroutines associated with the reader will
+       // stop and free up resources when they notice this channel is
+       // closed.
+       rdrClosed chan struct{}
+}
+
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+       if r.Error() != nil {
+               // Short circuit: the caller might as well find out
+               // now that we hit an error, even if there's buffered
+               // data we could return.
+               return 0, r.Error()
+       }
+       for len(r.buf) == 0 {
+               // Private buffer was emptied out by the last Read()
+               // (or this is the first Read() and r.buf is nil).
+               // Read from r.toRead until we get a non-empty slice
+               // or hit an error.
+               var ok bool
+               r.buf, ok = <-r.toRead
+               if r.Error() != nil {
+                       // Error encountered while waiting for bytes
+                       return 0, r.Error()
+               } else if !ok {
+                       // No more bytes to read, no error encountered
+                       return 0, io.EOF
+               }
+       }
+       // Copy as much as possible from our private buffer to the
+       // caller's buffer
+       n := len(r.buf)
+       if len(r.buf) > len(outbuf) {
+               n = len(outbuf)
+       }
+       copy(outbuf[:n], r.buf[:n])
+
+       // Next call to Read() will continue where we left off
+       r.buf = r.buf[n:]
+
+       return n, nil
+}
+
+// Close releases resources. It returns a non-nil error if an error
+// was encountered by the reader.
+func (r *cfReader) Close() error {
+       close(r.rdrClosed)
+       return r.Error()
+}
+
+// Error returns an error if one has been encountered, otherwise
+// nil. It is safe to call from any goroutine.
+func (r *cfReader) Error() error {
+       select {
+       case <-r.errNotNil:
+               return r.err
+       default:
+               return nil
+       }
+}
+
+// Len returns the total number of bytes in the file being read. If
+// necessary, it waits for manifest parsing to finish.
+func (r *cfReader) Len() uint64 {
+       // Wait for all segments to be counted
+       <-r.countDone
+       return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+       defer close(r.toRead)
+GET:
+       for fs := range r.toGet {
+               rdr, _, _, err := r.keepClient.Get(fs.Locator)
+               if err != nil {
+                       r.err = err
+                       close(r.errNotNil)
+                       return
+               }
+               var buf = make([]byte, fs.Offset+fs.Len)
+               _, err = io.ReadFull(rdr, buf)
+               if err != nil {
+                       r.err = err
+                       close(r.errNotNil)
+                       return
+               }
+               for bOff, bLen := fs.Offset, dataSliceSize; bOff < fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+                       if bOff+bLen > fs.Offset+fs.Len {
+                               bLen = fs.Offset + fs.Len - bOff
+                       }
+                       select {
+                       case r.toRead <- buf[bOff : bOff+bLen]:
+                       case <-r.rdrClosed:
+                               // Reader is closed: no point sending
+                               // anything more to toRead.
+                               break GET
+                       }
+               }
+               // It is possible that r.rdrClosed is closed but we
+               // never noticed because r.toRead was also ready in
+               // every select{} above. Here we check before wasting
+               // a keepclient.Get() call.
+               select {
+               case <-r.rdrClosed:
+                       break GET
+               default:
+               }
+       }
+       // In case we exited the above loop early: before returning,
+       // drain the toGet channel so its sender doesn't sit around
+       // blocking forever.
+       for _ = range r.toGet {
+       }
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+       r = new(cfReader)
+       r.keepClient = kc
+       r.rdrClosed = make(chan struct{})
+       r.errNotNil = make(chan struct{})
+       r.toGet = make(chan *manifest.FileSegment, 2)
+       r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
+       r.countDone = make(chan struct{})
+       go r.doGet()
+       return
+}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
new file mode 100644 (file)
index 0000000..58a047c
--- /dev/null
@@ -0,0 +1,223 @@
+package keepclient
+
+import (
+       "crypto/md5"
+       "crypto/rand"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "net/http"
+       "os"
+       "strconv"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CollectionReaderUnit{})
+
+type CollectionReaderUnit struct {
+       arv     arvadosclient.ArvadosClient
+       kc      *KeepClient
+       handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+       var err error
+       s.arv, err = arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.IsNil)
+       s.arv.ApiToken = arvadostest.ActiveToken
+
+       s.kc, err = MakeKeepClient(&s.arv)
+       c.Assert(err, check.IsNil)
+
+       s.handler = SuccessHandler{
+               disk: make(map[string][]byte),
+               lock: make(chan struct{}, 1),
+               ops:  new(int),
+       }
+       localRoots := make(map[string]string)
+       for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+       }
+       s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
+type SuccessHandler struct {
+       disk map[string][]byte
+       lock chan struct{} // channel with buffer==1: full when an operation is in progress.
+       ops  *int          // number of operations completed
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       switch req.Method {
+       case "PUT":
+               buf, err := ioutil.ReadAll(req.Body)
+               if err != nil {
+                       resp.WriteHeader(500)
+                       return
+               }
+               pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+               h.lock <- struct{}{}
+               h.disk[pdh] = buf
+               if h.ops != nil {
+                       (*h.ops)++
+               }
+               <-h.lock
+               resp.Write([]byte(pdh))
+       case "GET":
+               pdh := req.URL.Path[1:]
+               h.lock <- struct{}{}
+               buf, ok := h.disk[pdh]
+               if h.ops != nil {
+                       (*h.ops)++
+               }
+               <-h.lock
+               if !ok {
+                       resp.WriteHeader(http.StatusNotFound)
+               } else {
+                       resp.Write(buf)
+               }
+       default:
+               resp.WriteHeader(http.StatusMethodNotAllowed)
+       }
+}
+
+type rdrTest struct {
+       mt   string      // manifest text
+       f    string      // filename
+       want interface{} // error or string to expect
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+       s.kc.PutB([]byte("foo"))
+       s.kc.PutB([]byte("bar"))
+       s.kc.PutB([]byte("Hello world\n"))
+       s.kc.PutB([]byte(""))
+
+       mt := arvadostest.PathologicalManifest
+
+       for _, testCase := range []rdrTest{
+               {mt: mt, f: "zzzz", want: os.ErrNotExist},
+               {mt: mt, f: "frob", want: os.ErrNotExist},
+               {mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+               {mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+               {mt: mt, f: "/f", want: os.ErrNotExist},
+               {mt: mt, f: "./f", want: os.ErrNotExist},
+               {mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+               {mt: mt, f: "foo/zero", want: ""},
+               {mt: mt, f: "zero@0", want: ""},
+               {mt: mt, f: "zero@1", want: ""},
+               {mt: mt, f: "zero@4", want: ""},
+               {mt: mt, f: "zero@9", want: ""},
+               {mt: mt, f: "f", want: "f"},
+               {mt: mt, f: "ooba", want: "ooba"},
+               {mt: mt, f: "overlapReverse/o", want: "o"},
+               {mt: mt, f: "overlapReverse/oo", want: "oo"},
+               {mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+               {mt: mt, f: "foo bar/baz", want: "foo"},
+               {mt: mt, f: "segmented/frob", want: "frob"},
+               {mt: mt, f: "segmented/oof", want: "oof"},
+       } {
+               rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+               switch want := testCase.want.(type) {
+               case error:
+                       c.Check(rdr, check.IsNil)
+                       c.Check(err, check.Equals, want)
+               case string:
+                       buf := make([]byte, len(want))
+                       n, err := io.ReadFull(rdr, buf)
+                       c.Check(err, check.IsNil)
+                       for i := 0; i < 4; i++ {
+                               c.Check(string(buf), check.Equals, want)
+                               n, err = rdr.Read(buf)
+                               c.Check(n, check.Equals, 0)
+                               c.Check(err, check.Equals, io.EOF)
+                       }
+                       c.Check(rdr.Close(), check.Equals, nil)
+               }
+       }
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+       h := md5.New()
+       buf := make([]byte, 4096)
+       locs := make([]string, len(buf))
+       filesize := 0
+       for i := 0; i < len(locs); i++ {
+               _, err := io.ReadFull(rand.Reader, buf[:i])
+               c.Assert(err, check.IsNil)
+               h.Write(buf[:i])
+               locs[i], _, err = s.kc.PutB(buf[:i])
+               c.Assert(err, check.IsNil)
+               filesize += i
+       }
+       manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+       dataMD5 := h.Sum(nil)
+
+       checkMD5 := md5.New()
+       rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+       c.Check(err, check.IsNil)
+       _, err = io.Copy(checkMD5, rdr)
+       c.Check(err, check.IsNil)
+       _, err = rdr.Read(make([]byte, 1))
+       c.Check(err, check.Equals, io.EOF)
+       c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+       s.kc.PutB([]byte("foo"))
+
+       mt := ". "
+       for i := 0; i < 1000; i++ {
+               mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+       }
+       mt += "0:3000:foo1000.txt\n"
+
+       // Grab the stub server's lock, ensuring our cfReader doesn't
+       // get anything back from its first call to kc.Get() before we
+       // have a chance to call Close().
+       s.handler.lock <- struct{}{}
+       opsBeforeRead := *s.handler.ops
+
+       rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+       c.Assert(err, check.IsNil)
+
+       firstReadDone := make(chan struct{})
+       go func() {
+               rdr.Read(make([]byte, 6))
+               firstReadDone <- struct{}{}
+       }()
+       err = rdr.Close()
+       c.Assert(err, check.IsNil)
+       c.Assert(rdr.(*cfReader).Error(), check.IsNil)
+
+       // Release the stub server's lock. The first GET operation will proceed.
+       <-s.handler.lock
+
+       // Make sure our first read operation consumes the data
+       // received from the first GET.
+       <-firstReadDone
+
+       // doGet() should close toRead before sending any more bufs to it.
+       if what, ok := <-rdr.(*cfReader).toRead; ok {
+               c.Errorf("Got %q, expected toRead to be closed", what)
+       }
+
+       // Stub should have handled exactly one GET request.
+       c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+       manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+       buf := make([]byte, 1)
+       rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+       c.Check(err, check.IsNil)
+       for i := 0; i < 2; i++ {
+               _, err = io.ReadFull(rdr, buf)
+               c.Check(err, check.NotNil)
+               c.Check(err, check.Not(check.Equals), io.EOF)
+       }
+}
index 2f809b32560b7e6072fd6242963e18b2ab9430df..0e6fadcc3548c99d2e85c30219a74ffdce42bf72 100644 (file)
@@ -200,7 +200,7 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i
                                retryList = append(retryList, host)
                        } else if resp.StatusCode != http.StatusOK {
                                var respbody []byte
-                               respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+                               respbody, _ = ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
                                resp.Body.Close()
                                errs = append(errs, fmt.Sprintf("%s: HTTP %d %q",
                                        url, resp.StatusCode, bytes.TrimSpace(respbody)))
index df4638619f488bc86dd3626cf2579a60fda62bed..4615ebc92ef2f8b59bc70ac1d8320833faee8a41 100644 (file)
@@ -143,10 +143,9 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
                make(chan string)}
 
        UploadToStubHelper(c, st,
-               func(kc *KeepClient, url string, reader io.ReadCloser,
-                       writer io.WriteCloser, upload_status chan uploadStatus) {
+               func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
 
-                       go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
+                       go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), 0)
 
                        writer.Write([]byte("foo"))
                        writer.Close()
@@ -178,7 +177,7 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
 
                        br1 := tr.MakeStreamReader()
 
-                       go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, "TestUploadToStubKeepServerBufferReader")
+                       go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, 0)
 
                        writer.Write([]byte("foo"))
                        writer.Close()
@@ -238,7 +237,7 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
                func(kc *KeepClient, url string, reader io.ReadCloser,
                        writer io.WriteCloser, upload_status chan uploadStatus) {
 
-                       go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
+                       go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, 0)
 
                        writer.Write([]byte("foo"))
                        writer.Close()
index bd9b2e35f79be1fe5c6c1fc125b4449875f97f2c..8414afab1eace8b00125ca88f4f37279b95fc558 100644 (file)
@@ -9,12 +9,18 @@ import (
        "io"
        "io/ioutil"
        "log"
+       "math/rand"
        "net"
        "net/http"
        "strings"
        "time"
 )
 
+// Function used to emit debug messages. The easiest way to enable
+// keepclient debug messages in your application is to assign
+// log.Printf to DebugPrintf.
+var DebugPrintf = func(string, ...interface{}) {}
+
 type keepService struct {
        Uuid     string `json:"uuid"`
        Hostname string `json:"service_host"`
@@ -169,14 +175,14 @@ type uploadStatus struct {
        response        string
 }
 
-func (this KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
-       upload_status chan<- uploadStatus, expectedLength int64, requestId string) {
+func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
+       upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
 
        var req *http.Request
        var err error
        var url = fmt.Sprintf("%s/%s", host, hash)
        if req, err = http.NewRequest("PUT", url, nil); err != nil {
-               log.Printf("[%v] Error creating request PUT %v error: %v", requestId, url, err.Error())
+               log.Printf("[%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
                upload_status <- uploadStatus{err, url, 0, 0, ""}
                body.Close()
                return
@@ -201,7 +207,7 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 
        var resp *http.Response
        if resp, err = this.Client.Do(req); err != nil {
-               log.Printf("[%v] Upload failed %v error: %v", requestId, url, err.Error())
+               log.Printf("[%08x] Upload failed %v error: %v", requestID, url, err.Error())
                upload_status <- uploadStatus{err, url, 0, 0, ""}
                return
        }
@@ -214,28 +220,28 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
        defer resp.Body.Close()
        defer io.Copy(ioutil.Discard, resp.Body)
 
-       respbody, err2 := ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+       respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
        response := strings.TrimSpace(string(respbody))
        if err2 != nil && err2 != io.EOF {
-               log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
+               log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
                upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
        } else if resp.StatusCode == http.StatusOK {
-               log.Printf("[%v] Upload %v success", requestId, url)
+               log.Printf("[%08x] Upload %v success", requestID, url)
                upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
        } else {
-               log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
+               log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
                upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
        }
 }
 
-func (this KeepClient) putReplicas(
+func (this *KeepClient) putReplicas(
        hash string,
        tr *streamer.AsyncStream,
        expectedLength int64) (locator string, replicas int, err error) {
 
-       // Take the hash of locator and timestamp in order to identify this
-       // specific transaction in log statements.
-       requestId := fmt.Sprintf("%x", md5.Sum([]byte(hash+time.Now().String())))[0:8]
+       // Generate an arbitrary ID to identify this specific
+       // transaction in debug logs.
+       requestID := rand.Int31()
 
        // Calculate the ordering for uploading to servers
        sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
@@ -280,8 +286,8 @@ func (this KeepClient) putReplicas(
                        for active*replicasPerThread < remaining_replicas {
                                // Start some upload requests
                                if next_server < len(sv) {
-                                       log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
-                                       go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestId)
+                                       log.Printf("[%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
+                                       go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
                                        next_server += 1
                                        active += 1
                                } else {
@@ -292,8 +298,8 @@ func (this KeepClient) putReplicas(
                                        }
                                }
                        }
-                       log.Printf("[%v] Replicas remaining to write: %v active uploads: %v",
-                               requestId, remaining_replicas, active)
+                       log.Printf("[%08x] Replicas remaining to write: %v active uploads: %v",
+                               requestID, remaining_replicas, active)
 
                        // Now wait for something to happen.
                        if active > 0 {
index 4e816cd73b30abbb7afce9f4b51d3767a897cfa8..f104d9a1035127c3f75502854e176c72a4017e1d 100644 (file)
 package manifest
 
 import (
+       "errors"
+       "fmt"
        "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        "log"
+       "regexp"
+       "strconv"
        "strings"
 )
 
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+       "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
+
 type Manifest struct {
        Text string
 }
 
+type BlockLocator struct {
+       Digest blockdigest.BlockDigest
+       Size   int
+       Hints  []string
+}
+
+type DataSegment struct {
+       BlockLocator
+       Locator      string
+       StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+       Locator string
+       // Offset (within this block) of this data segment
+       Offset int
+       Len    int
+}
+
 // Represents a single line from a manifest.
 type ManifestStream struct {
        StreamName string
        Blocks     []string
-       Files      []string
+       FileTokens []string
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+       if seq == `\\` {
+               return `\`
+       }
+       i, err := strconv.ParseUint(seq[1:], 8, 8)
+       if err != nil {
+               // Invalid escape sequence: can't unescape.
+               return seq
+       }
+       return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+       return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+       if !LocatorPattern.MatchString(s) {
+               err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+                       "\"%s\".",
+                       s,
+                       LocatorPattern.String())
+       } else {
+               tokens := strings.Split(s, "+")
+               var blockSize int64
+               var blockDigest blockdigest.BlockDigest
+               // We expect both of the following to succeed since LocatorPattern
+               // restricts the strings appropriately.
+               blockDigest, err = blockdigest.FromString(tokens[0])
+               if err != nil {
+                       return
+               }
+               blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+               if err != nil {
+                       return
+               }
+               b.Digest = blockDigest
+               b.Size = int(blockSize)
+               b.Hints = tokens[2:]
+       }
+       return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+       parts := strings.SplitN(tok, ":", 3)
+       if len(parts) != 3 {
+               err = ErrInvalidToken
+               return
+       }
+       segPos, err = strconv.ParseUint(parts[0], 10, 64)
+       if err != nil {
+               return
+       }
+       segLen, err = strconv.ParseUint(parts[1], 10, 64)
+       if err != nil {
+               return
+       }
+       name = UnescapeName(parts[2])
+       return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+       ch := make(chan *FileSegment)
+       go func() {
+               s.sendFileSegmentIterByName(filepath, ch)
+               close(ch)
+       }()
+       return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+       blockLens := make([]int, 0, len(s.Blocks))
+       // This is what streamName+"/"+fileName will look like:
+       target := "./" + filepath
+       for _, fTok := range s.FileTokens {
+               wantPos, wantLen, name, err := parseFileToken(fTok)
+               if err != nil {
+                       // Skip (!) invalid file tokens.
+                       continue
+               }
+               if s.StreamName+"/"+name != target {
+                       continue
+               }
+               if wantLen == 0 {
+                       ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+                       continue
+               }
+               // Linear search for blocks containing data for this
+               // file
+               var blockPos uint64 = 0 // position of block in stream
+               for i, loc := range s.Blocks {
+                       if blockPos >= wantPos+wantLen {
+                               break
+                       }
+                       if len(blockLens) <= i {
+                               blockLens = blockLens[:i+1]
+                               b, err := ParseBlockLocator(loc)
+                               if err != nil {
+                                       // Unparseable locator -> unusable
+                                       // stream.
+                                       ch <- nil
+                                       return
+                               }
+                               blockLens[i] = b.Size
+                       }
+                       blockLen := uint64(blockLens[i])
+                       if blockPos+blockLen <= wantPos {
+                               blockPos += blockLen
+                               continue
+                       }
+                       fseg := FileSegment{
+                               Locator: loc,
+                               Offset:  0,
+                               Len:     blockLens[i],
+                       }
+                       if blockPos < wantPos {
+                               fseg.Offset = int(wantPos - blockPos)
+                               fseg.Len -= fseg.Offset
+                       }
+                       if blockPos+blockLen > wantPos+wantLen {
+                               fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+                       }
+                       ch <- &fseg
+                       blockPos += blockLen
+               }
+       }
 }
 
 func parseManifestStream(s string) (m ManifestStream) {
        tokens := strings.Split(s, " ")
-       m.StreamName = tokens[0]
+       m.StreamName = UnescapeName(tokens[0])
        tokens = tokens[1:]
        var i int
        for i = range tokens {
@@ -32,7 +192,7 @@ func parseManifestStream(s string) (m ManifestStream) {
                }
        }
        m.Blocks = tokens[:i]
-       m.Files = tokens[i:]
+       m.FileTokens = tokens[i:]
        return
 }
 
@@ -58,6 +218,20 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
        return ch
 }
 
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+       ch := make(chan *FileSegment)
+       go func() {
+               for stream := range m.StreamIter() {
+                       if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+                               continue
+                       }
+                       stream.sendFileSegmentIterByName(filepath, ch)
+               }
+               close(ch)
+       }()
+       return ch
+}
+
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
index 8cfe3d907721e4f7d33048dfa16ef91e38dec12d..364648d643cea64dadc706eb1f5c42e227ccbb39 100644 (file)
@@ -1,10 +1,13 @@
 package manifest
 
 import (
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
        "io/ioutil"
+       "reflect"
        "runtime"
        "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
 func getStackTrace() string {
@@ -60,7 +63,7 @@ func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
        expectEqual(t, actual.StreamName, expected.StreamName)
        expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
-       expectStringSlicesEqual(t, actual.Files, expected.Files)
+       expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
 }
 
 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
@@ -72,8 +75,19 @@ func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected
 func TestParseManifestStreamSimple(t *testing.T) {
        m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
        expectManifestStream(t, m, ManifestStream{StreamName: ".",
-               Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-               Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+               Blocks:     []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+               FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+       b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+       if err != nil {
+               t.Fatalf("Unexpected error parsing block locator: %v", err)
+       }
+       expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
+               Size: 2310,
+               Hints: []string{"K@qr1hi",
+                       "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -88,8 +102,8 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
        expectManifestStream(t,
                firstStream,
                ManifestStream{StreamName: ".",
-                       Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
-                       Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+                       Blocks:     []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
+                       FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
        received, ok := <-streamIter
        if ok {
@@ -126,3 +140,58 @@ func TestBlockIterLongManifest(t *testing.T) {
                        Size:  31367794,
                        Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db@5441920c"}})
 }
+
+func TestUnescape(t *testing.T) {
+       for _, testCase := range [][]string{
+               {`\040`, ` `},
+               {`\009`, `\009`},
+               {`\\\040\\`, `\ \`},
+               {`\\040\`, `\040\`},
+       } {
+               in := testCase[0]
+               expect := testCase[1]
+               got := UnescapeName(in)
+               if expect != got {
+                       t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+               }
+       }
+}
+
+type fsegtest struct {
+       mt   string        // manifest text
+       f    string        // filename
+       want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+       mt := arvadostest.PathologicalManifest
+       for _, testCase := range []fsegtest{
+               {mt: mt, f: "zzzz", want: nil},
+               // This case is too sensitive: it would be acceptable
+               // (even preferable) to return only one empty segment.
+               {mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "zero@9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+               {mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+               {mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+               {mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+               {mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+               {mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+               {mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+               // This case is too sensitive: it would be better to
+               // omit the empty segment.
+               {mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+               {mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+       } {
+               m := Manifest{Text: testCase.mt}
+               var got []FileSegment
+               for fs := range m.FileSegmentIterByName(testCase.f) {
+                       got = append(got, *fs)
+               }
+               if !reflect.DeepEqual(got, testCase.want) {
+                       t.Errorf("For %#v:\n got  %#v\n want %#v", testCase.f, got, testCase.want)
+               }
+       }
+}
index 8df945a063f3c76999c882b8f27dc20649c07d8c..8815565460b6ab842c1a42c20c00c905c9b26998 100644 (file)
@@ -101,7 +101,7 @@ def statfile(prefix, fn):
 
     return prefix+fn
 
-def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
+def uploadfiles(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)"):
     # Find the smallest path prefix that includes all the files that need to be uploaded.
     # This starts at the root and iteratively removes common parent directory prefixes
     # until all file pathes no longer have a common parent.
@@ -153,7 +153,7 @@ def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
         logger.info("Uploaded to %s", item["uuid"])
 
     for c in files:
-        c.fn = "$(file %s/%s)" % (pdh, c.fn)
+        c.fn = fnPattern % (pdh, c.fn)
 
     os.chdir(orgdir)
 
index d325b4eb6ecb086d15effa34bc26db3e95c9ad15..972b7f9d5172077b740e157cdb3f59b360ca824f 100644 (file)
@@ -333,7 +333,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
         keep_args['-enforce-permissions'] = 'true'
     with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
         keep_args['-data-manager-token-file'] = f.name
-        f.write(os.environ['ARVADOS_API_TOKEN'])
+        f.write(auth_token('data_manager'))
     keep_args['-never-delete'] = 'false'
 
     api = arvados.api(
diff --git a/services/api/lib/create_superuser_token.rb b/services/api/lib/create_superuser_token.rb
new file mode 100755 (executable)
index 0000000..54faa9a
--- /dev/null
@@ -0,0 +1,48 @@
+# Install the supplied string (or a randomly generated token, if none
+# is given) as an API token that authenticates to the system user account.
+
+module CreateSuperUserToken
+  require File.dirname(__FILE__) + '/../config/boot'
+  require File.dirname(__FILE__) + '/../config/environment'
+
+  include ApplicationHelper
+
+  def create_superuser_token supplied_token=nil
+    act_as_system_user do
+      # If token is supplied, verify that it indeed is a superuser token
+      if supplied_token
+        api_client_auth = ApiClientAuthorization.
+          where(api_token: supplied_token).
+          first
+        if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
+          raise "Token already exists but is not a superuser token."
+        end
+      end
+
+      # need to create a token
+      if !api_client_auth
+        # Get (or create) trusted api client
+        apiClient =  ApiClient.find_or_create_by_url_prefix_and_is_trusted("ssh://root@localhost/", true)
+
+        # Check if there is an unexpired superuser token corresponding to this api client
+        api_client_auth = ApiClientAuthorization.where(
+                'user_id = (?) AND
+                 api_client_id = (?) AND
+                 (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)',
+               system_user.id, apiClient.id).first
+
+        # none exist; create one with the supplied token
+        if !api_client_auth
+          api_client_auth = ApiClientAuthorization.
+            new(user: system_user,
+              api_client_id: apiClient.id,
+              created_by_ip_address: '::1',
+              api_token: supplied_token)
+          api_client_auth.save!
+        end
+      end
+
+      api_client_auth.api_token
+    end
+  end
+end
index d119f8a887a1244f64e58013da4352fd591b635e..7a2682af46f062b47f59a54dd12303a3e887bca6 100755 (executable)
@@ -6,30 +6,10 @@
 #
 # Print the token on stdout.
 
-supplied_token = ARGV[0]
-
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-
-include ApplicationHelper
-act_as_system_user
+require './lib/create_superuser_token'
+include CreateSuperUserToken
 
-if supplied_token
-  api_client_auth = ApiClientAuthorization.
-    where(api_token: supplied_token).
-    first
-  if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
-    raise ActiveRecord::RecordNotUnique("Token already exists but is not a superuser token.")
-  end
-end
-
-if !api_client_auth
-  api_client_auth = ApiClientAuthorization.
-    new(user: system_user,
-        api_client_id: 0,
-        created_by_ip_address: '::1',
-        api_token: supplied_token)
-  api_client_auth.save!
-end
+supplied_token = ARGV[0]
 
-puts api_client_auth.api_token
+token = CreateSuperUserToken.create_superuser_token supplied_token
+puts token
index 9199d178f6bcdfec3c8536d8da9f7e6b22613898..b9ea29c314792909d3fc48e3f2fdea32175bd70b 100644 (file)
@@ -18,6 +18,18 @@ admin_trustedclient:
   api_token: 1a9ffdcga2o7cw8q12dndskomgs1ygli3ns9k2o9hgzgmktc78
   expires_at: 2038-01-01 00:00:00
 
+data_manager:
+  api_client: untrusted
+  user: system_user
+  api_token: 320mkve8qkswstz7ff61glpk3mhgghmg67wmic7elw4z41pke1
+  expires_at: 2038-01-01 00:00:00
+  scopes:
+    - GET /arvados/v1/collections
+    - GET /arvados/v1/keep_services
+    - GET /arvados/v1/keep_services/accessible
+    - GET /arvados/v1/users/current
+    - POST /arvados/v1/logs
+
 miniadmin:
   api_client: untrusted
   user: miniadmin
index d7f6f92f186341d9da550ff6b4e95f132787f04f..4ba5a3b52ddce951dad70f465e60ab7aa236969a 100644 (file)
@@ -299,7 +299,7 @@ collection_with_files_in_subdir:
   modified_by_user_uuid: zzzzz-tpzed-user1withloadab
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
-  manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
+  manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
 
 graph_test_collection1:
   uuid: zzzzz-4zz18-bv31uwvy3neko22
@@ -481,7 +481,7 @@ collection_with_repeated_filenames_and_contents_in_two_dirs_1:
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
   name: collection_with_repeated_filenames_and_contents_in_two_dirs_1
-  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34+Af966b611a1e6a7df18e0f20ac742a255c27744b7@550a3f11 0:12:alice 12:12:alice.txt 24:10:bob.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44+A1780092551dadcb9c74190a793a779cea84d632d@550a3f11 0:27:alice.txt\n"
+  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34 0:12:alice 12:12:alice.txt 24:10:bob.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44 0:27:alice.txt\n"
 
 collection_with_repeated_filenames_and_contents_in_two_dirs_2:
   uuid: zzzzz-4zz18-duplicatenames2
@@ -493,7 +493,7 @@ collection_with_repeated_filenames_and_contents_in_two_dirs_2:
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
   name: collection_with_repeated_filenames_and_contents_in_two_dirs_2
-  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34+Af966b611a1e6a7df18e0f20ac742a255c27744b7@550a3f11 0:12:alice 12:12:alice.txt 24:10:carol.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44+A1780092551dadcb9c74190a793a779cea84d632d@550a3f11 0:27:alice.txt\n"
+  manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34 0:12:alice 12:12:alice.txt 24:10:carol.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44 0:27:alice.txt\n"
 
 foo_and_bar_files_in_dir:
   uuid: zzzzz-4zz18-foonbarfilesdir
@@ -505,7 +505,7 @@ foo_and_bar_files_in_dir:
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
   name: foo_file_in_dir
-  manifest_text: "./dir1 a84b928ebdbae3f658518c711beaec02+28+A0cff02249e70e8cd6e55dba49fef4afa3f5bfdfb@550acd28 0:3:bar 12:16:foo\n"
+  manifest_text: "./dir1 3858f62230ac3c915f300c664312c63f+6 3:3:bar 0:3:foo\n"
 
 multi_level_to_combine:
   uuid: zzzzz-4zz18-pyw8yp9g3ujh45f
diff --git a/services/api/test/unit/create_superuser_token_test.rb b/services/api/test/unit/create_superuser_token_test.rb
new file mode 100644 (file)
index 0000000..d5ca3f9
--- /dev/null
@@ -0,0 +1,78 @@
+require 'test_helper'
+require 'create_superuser_token'
+
+class CreateSuperUserTokenTest < ActiveSupport::TestCase
+  include CreateSuperUserToken
+
+  test "create superuser token twice and expect same resutls" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Create token again; this time, we should get the one created earlier
+    token2 = create_superuser_token
+    assert_not_nil token2
+    assert_equal token1, token2
+  end
+
+  test "create superuser token with two different inputs and expect the first both times" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Create token again with some other string and expect the existing superuser token back
+    token2 = create_superuser_token 'someothertokenstring'
+    assert_not_nil token2
+    assert_equal token1, token2
+  end
+
+  test "create superuser token twice and expect same results" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Create token again with that same superuser token and expect it back
+    token2 = create_superuser_token 'atesttoken'
+    assert_not_nil token2
+    assert_equal token1, token2
+  end
+
+  test "create superuser token and invoke again with some other valid token" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    su_token = api_client_authorizations("system_user").api_token
+    token2 = create_superuser_token su_token
+    assert_equal token2, su_token
+  end
+
+  test "create superuser token, expire it, and create again" do
+    # Create a token with some string
+    token1 = create_superuser_token 'atesttoken'
+    assert_not_nil token1
+    assert_equal token1, 'atesttoken'
+
+    # Expire this token and call create again; expect a new token created
+    apiClientAuth = ApiClientAuthorization.where(api_token: token1).first
+    Thread.current[:user] = users(:admin)
+    apiClientAuth.update_attributes expires_at: '2000-10-10'
+
+    token2 = create_superuser_token
+    assert_not_nil token2
+    assert_not_equal token1, token2
+  end
+
+  test "invoke create superuser token with an invalid non-superuser token and expect error" do
+    active_user_token = api_client_authorizations("active").api_token
+    e = assert_raises RuntimeError do
+      create_superuser_token active_user_token
+    end
+    assert_not_nil e
+    assert_equal "Token already exists but is not a superuser token.", e.message
+  end
+end
index 13f4dc60f71db9567e26f534c2379a63c601289b..69f31afbc9589ce6cd6c9de2a731d5093e2c80cd 100644 (file)
@@ -101,7 +101,7 @@ func TestCopyPipeToChildLogLongLines(t *testing.T) {
        }
 
        if after, err := rcv.ReadBytes('\n'); err != nil || string(after) != "after\n" {
-               t.Fatal("\"after\n\" not received (got \"%s\", %s)", after, err)
+               t.Fatalf("\"after\n\" not received (got \"%s\", %s)", after, err)
        }
 
        select {
index c2cb762d52b625b625634f24d385ddbf9ad4e7d8..685f94c88f3a35c33f6aa986e85701a5853e5d32 100644 (file)
@@ -16,11 +16,6 @@ import (
        "time"
 )
 
-const (
-       ActiveUserToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
-       AdminToken      = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-)
-
 var arv arvadosclient.ArvadosClient
 var keepClient *keepclient.KeepClient
 var keepServers []string
@@ -34,6 +29,7 @@ func SetupDataManagerTest(t *testing.T) {
        arvadostest.StartKeep(2, false)
 
        arv = makeArvadosClient()
+       arv.ApiToken = arvadostest.DataManagerToken
 
        // keep client
        keepClient = &keepclient.KeepClient{
@@ -124,7 +120,18 @@ func getFirstLocatorFromCollection(t *testing.T, uuid string) string {
        return match[1] + "+" + match[2]
 }
 
+func switchToken(t string) func() {
+       orig := arv.ApiToken
+       restore := func() {
+               arv.ApiToken = orig
+       }
+       arv.ApiToken = t
+       return restore
+}
+
 func getCollection(t *testing.T, uuid string) Dict {
+       defer switchToken(arvadostest.AdminToken)()
+
        getback := make(Dict)
        err := arv.Get("collections", uuid, nil, &getback)
        if err != nil {
@@ -138,6 +145,8 @@ func getCollection(t *testing.T, uuid string) Dict {
 }
 
 func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
+       defer switchToken(arvadostest.AdminToken)()
+
        err := arv.Update("collections", uuid, arvadosclient.Dict{
                "collection": arvadosclient.Dict{
                        paramName: paramValue,
@@ -152,6 +161,8 @@ func updateCollection(t *testing.T, uuid string, paramName string, paramValue st
 type Dict map[string]interface{}
 
 func deleteCollection(t *testing.T, uuid string) {
+       defer switchToken(arvadostest.AdminToken)()
+
        getback := make(Dict)
        err := arv.Delete("collections", uuid, nil, &getback)
        if err != nil {
@@ -175,7 +186,7 @@ func getBlockIndexesForServer(t *testing.T, i int) []string {
        path := keepServers[i] + "/index"
        client := http.Client{}
        req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
        req.Header.Add("Content-Type", "application/octet-stream")
        resp, err := client.Do(req)
        defer resp.Body.Close()
@@ -297,7 +308,7 @@ func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
 func getStatus(t *testing.T, path string) interface{} {
        client := http.Client{}
        req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
        req.Header.Add("Content-Type", "application/octet-stream")
        resp, err := client.Do(req)
        if err != nil {
@@ -504,7 +515,7 @@ func TestRunDatamanagerAsNonAdminUser(t *testing.T) {
        defer TearDownDataManagerTest(t)
        SetupDataManagerTest(t)
 
-       arv.ApiToken = ActiveUserToken
+       arv.ApiToken = arvadostest.ActiveToken
 
        err := singlerun(arv)
        if err == nil {
index 86c2b089aa13088d8da8f524ab21dcc8dafc9641..3a9c21a43855a472c4f5b43aa9b651fd85f506d4 100644 (file)
@@ -23,7 +23,7 @@ import (
 
 // ServerAddress struct
 type ServerAddress struct {
-       SSL         bool   `json:service_ssl_flag`
+       SSL         bool   `json:"service_ssl_flag"`
        Host        string `json:"service_host"`
        Port        int    `json:"service_port"`
        UUID        string `json:"uuid"`
index 191cb55601053d1f58a284ce08847149d04d2522..2435e6de806043b4e014071e8f9e05f06b159c4e 100755 (executable)
@@ -177,9 +177,10 @@ class DockerImageUseRecorder(DockerEventListener):
 class DockerImageCleaner(DockerImageUseRecorder):
     event_handlers = DockerImageUseRecorder.event_handlers.copy()
 
-    def __init__(self, images, docker_client, events):
+    def __init__(self, images, docker_client, events, remove_containers_onexit=False):
         super().__init__(images, docker_client, events)
         self.logged_unknown = set()
+        self.remove_containers_onexit = remove_containers_onexit
 
     def new_container(self, event, container_hash):
         container_image_id = container_hash['Image']
@@ -188,6 +189,29 @@ class DockerImageCleaner(DockerImageUseRecorder):
             self.images.add_image(image_hash)
         return super().new_container(event, container_hash)
 
+    def _remove_container(self, cid):
+        try:
+            self.docker_client.remove_container(cid)
+        except docker.errors.APIError as error:
+            logger.warning("Failed to remove container %s: %s", cid, error)
+        else:
+            logger.info("Removed container %s", cid)
+
+    @event_handlers.on('die')
+    def clean_container(self, event=None):
+        if self.remove_containers_onexit:
+            self._remove_container(event['id'])
+
+    def check_stopped_containers(self, remove=False):
+        logger.info("Checking for stopped containers")
+        for c in self.docker_client.containers(filters={'status': 'exited'}):
+            logger.info("Container %s %s", c['Id'], c['Status'])
+            if c['Status'][:6] != 'Exited':
+                logger.error("Unexpected status %s for container %s",
+                             c['Status'], c['Id'])
+            elif remove:
+                self._remove_container(c['Id'])
+
     @event_handlers.on('destroy')
     def clean_images(self, event=None):
         for image_id in self.images.should_delete():
@@ -225,6 +249,12 @@ def parse_arguments(arguments):
     parser.add_argument(
         '--quota', action='store', type=human_size, required=True,
         help="space allowance for Docker images, suffixed with K/M/G/T")
+    parser.add_argument(
+        '--remove-stopped-containers', type=str, default='always',
+        choices=['never', 'onexit', 'always'],
+        help="""when to remove stopped containers (default: always, i.e., remove
+        stopped containers found at startup, and remove containers as
+        soon as they exit)""")
     parser.add_argument(
         '--verbose', '-v', action='count', default=0,
         help="log more information")
@@ -246,9 +276,13 @@ def run(args, docker_client):
         images, docker_client, docker_client.events(since=1, until=start_time))
     use_recorder.run()
     cleaner = DockerImageCleaner(
-        images, docker_client, docker_client.events(since=start_time))
-    logger.info("Starting cleanup loop")
+        images, docker_client, docker_client.events(since=start_time),
+        remove_containers_onexit=args.remove_stopped_containers != 'never')
+    cleaner.check_stopped_containers(
+        remove=args.remove_stopped_containers == 'always')
+    logger.info("Checking image quota at startup")
     cleaner.clean_images()
+    logger.info("Listening for docker events")
     cleaner.run()
 
 def main(arguments):
index fd959de7762dd2b0a54f5b08b1107e4aedc45d5a..43abe4f636dcb1076ee65e90f7a549f6616af834 100644 (file)
@@ -223,13 +223,14 @@ class DockerImagesTestCase(unittest.TestCase):
 
 class DockerImageUseRecorderTestCase(unittest.TestCase):
     TEST_CLASS = cleaner.DockerImageUseRecorder
+    TEST_CLASS_INIT_KWARGS = {}
 
     def setUp(self):
         self.images = mock.MagicMock(name='images')
         self.docker_client = mock.MagicMock(name='docker_client')
         self.events = []
         self.recorder = self.TEST_CLASS(self.images, self.docker_client,
-                                        self.encoded_events)
+                                        self.encoded_events, **self.TEST_CLASS_INIT_KWARGS)
 
     @property
     def encoded_events(self):
@@ -310,6 +311,23 @@ class DockerImageCleanerTestCase(DockerImageUseRecorderTestCase):
         self.assertFalse(self.images.del_image.called)
 
 
+class DockerContainerCleanerTestCase(DockerImageUseRecorderTestCase):
+    TEST_CLASS = cleaner.DockerImageCleaner
+    TEST_CLASS_INIT_KWARGS = {'remove_containers_onexit': True}
+
+    @mock.patch('arvados_docker.cleaner.logger')
+    def test_failed_container_deletion_handling(self, mockLogger):
+        cid = MockDockerId()
+        self.docker_client.remove_container.side_effect = MockException(500)
+        self.events.append(MockEvent('die', docker_id=cid))
+        self.recorder.run()
+        self.docker_client.remove_container.assert_called_with(cid)
+        self.assertEqual("Failed to remove container %s: %s",
+                         mockLogger.warning.call_args[0][0])
+        self.assertEqual(cid,
+                         mockLogger.warning.call_args[0][1])
+
+
 class HumanSizeTestCase(unittest.TestCase):
     def check(self, human_str, count, exp):
         self.assertEqual(count * (1024 ** exp),
@@ -354,3 +372,57 @@ class RunTestCase(unittest.TestCase):
         self.assertLessEqual(test_start_time, event_kwargs[0]['until'])
         self.assertIn('since', event_kwargs[1])
         self.assertEqual(event_kwargs[0]['until'], event_kwargs[1]['since'])
+
+
+class ContainerRemovalTestCase(unittest.TestCase):
+    LIFECYCLE = ['create', 'attach', 'start', 'resize', 'die', 'destroy']
+
+    def setUp(self):
+        self.args = mock.MagicMock(name='args')
+        self.docker_client = mock.MagicMock(name='docker_client')
+        self.existingCID = MockDockerId()
+        self.docker_client.containers.return_value = [{
+            'Id': self.existingCID,
+            'Status': 'Exited (0) 6 weeks ago',
+        }, {
+            # If docker_client.containers() returns non-exited
+            # containers for some reason, do not remove them.
+            'Id': MockDockerId(),
+            'Status': 'Running',
+        }]
+        self.newCID = MockDockerId()
+        self.docker_client.events.return_value = [
+            MockEvent(e, docker_id=self.newCID).encoded()
+            for e in self.LIFECYCLE]
+
+    def test_remove_onexit(self):
+        self.args.remove_stopped_containers = 'onexit'
+        cleaner.run(self.args, self.docker_client)
+        self.docker_client.remove_container.assert_called_once_with(self.newCID)
+
+    def test_remove_always(self):
+        self.args.remove_stopped_containers = 'always'
+        cleaner.run(self.args, self.docker_client)
+        self.docker_client.remove_container.assert_any_call(self.existingCID)
+        self.docker_client.remove_container.assert_any_call(self.newCID)
+        self.assertEqual(2, self.docker_client.remove_container.call_count)
+
+    def test_remove_never(self):
+        self.args.remove_stopped_containers = 'never'
+        cleaner.run(self.args, self.docker_client)
+        self.assertEqual(0, self.docker_client.remove_container.call_count)
+
+    def test_container_exited_between_subscribe_events_and_check_existing(self):
+        self.args.remove_stopped_containers = 'always'
+        self.docker_client.events.return_value = [
+            MockEvent(e, docker_id=self.existingCID).encoded()
+            for e in ['die', 'destroy']]
+        cleaner.run(self.args, self.docker_client)
+        # Subscribed to events before getting the list of existing
+        # exited containers?
+        self.docker_client.assert_has_calls([
+            mock.call.events(since=mock.ANY),
+            mock.call.containers(filters={'status':'exited'})])
+        # Asked to delete the container twice?
+        self.docker_client.remove_container.assert_has_calls([mock.call(self.existingCID)] * 2)
+        self.assertEqual(2, self.docker_client.remove_container.call_count)
diff --git a/services/keep-web/.gitignore b/services/keep-web/.gitignore
new file mode 100644 (file)
index 0000000..53997c2
--- /dev/null
@@ -0,0 +1 @@
+keep-web
diff --git a/services/keep-web/anonymous.go b/services/keep-web/anonymous.go
new file mode 100644 (file)
index 0000000..15a98c2
--- /dev/null
@@ -0,0 +1,35 @@
+package main
+
+import (
+       "flag"
+       "fmt"
+       "os"
+       "strconv"
+)
+
+var anonymousTokens tokenSet
+
+type tokenSet []string
+
+func (ts *tokenSet) Set(s string) error {
+       v, err := strconv.ParseBool(s)
+       if v && len(*ts) == 0 {
+               *ts = append(*ts, os.Getenv("ARVADOS_API_TOKEN"))
+       } else if !v {
+               *ts = (*ts)[:0]
+       }
+       return err
+}
+
+func (ts *tokenSet) String() string {
+       return fmt.Sprintf("%v", len(*ts) > 0)
+}
+
+func (ts *tokenSet) IsBoolFlag() bool {
+       return true
+}
+
+func init() {
+       flag.Var(&anonymousTokens, "allow-anonymous",
+               "Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection.")
+}
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
new file mode 100644 (file)
index 0000000..4207d7b
--- /dev/null
@@ -0,0 +1,235 @@
+// Keep-web provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
+//
+// See http://doc.arvados.org/install/install-keep-web.html.
+//
+// Run "keep-web -help" to show all supported options.
+//
+// Starting the server
+//
+// Serve HTTP requests at port 1234 on all interfaces:
+//
+//   keep-web -listen=:1234
+//
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
+//
+//   keep-web -listen=1.2.3.4:1234
+//
+// Proxy configuration
+//
+// Keep-web does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+// Here is an example nginx configuration.
+//
+//     http {
+//       upstream keep-web {
+//         server localhost:1234;
+//       }
+//       server {
+//         listen *:443 ssl;
+//         server_name collections.example.com *.collections.example.com ~.*--collections.example.com;
+//         ssl_certificate /root/wildcard.example.com.crt;
+//         ssl_certificate_key /root/wildcard.example.com.key;
+//         location  / {
+//           proxy_pass http://keep-web;
+//           proxy_set_header Host $host;
+//           proxy_set_header X-Forwarded-For $remote_addr;
+//         }
+//       }
+//     }
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Anonymous downloads
+//
+// Use the -allow-anonymous flag with an ARVADOS_API_TOKEN environment
+// variable to specify a token to use when clients try to retrieve
+// files without providing their own Arvados API token.
+//
+//   export ARVADOS_API_TOKEN=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+//   keep-web [...] -allow-anonymous
+//
+// See http://doc.arvados.org/install/install-keep-web.html for examples.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections and collections shared anonymously via secret links
+// (i.e., collections which can be served by keep-web without making
+// use of any implicit credentials like cookies). See "Same-origin
+// URLs" below.
+//
+//   http://collections.example.com/c=uuid_or_pdh/path/file.txt
+//   http://collections.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--collections.example.com/path/file.txt
+//   http://uuid_or_pdh--collections.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the downstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--collections.example.com/path/file.txt
+//   http://uuid_or_pdh.collections.example.com/path/file.txt
+//
+// The first form (with "--" instead of ".") avoids the cost and
+// effort of deploying a wildcard TLS certificate for
+// *.collections.example.com at sites that already have a wildcard
+// certificate for *.example.com. The second form is likely to be
+// easier to configure, and more efficient to run, on a downstream
+// proxy.
+//
+// In all of the above forms, the "collections.example.com" part can
+// be anything at all: keep-web itself ignores everything after the
+// first "." or "--". (Of course, in order for clients to connect at
+// all, DNS and any relevant proxies must be configured accordingly.)
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// optionally replaced by "-". (When "uuid_or_pdh" appears in the
+// domain name, replacing "+" with "-" is mandatory, because "+" is
+// not a valid character in a domain name.)
+//
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/foo/bar.txt
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/_/foo/bar.txt
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--collections.example.com/_/foo/bar.txt
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo/bar.txt
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo/bar.txt
+//
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://collections.example.com/collections/download/uuid_or_pdh/TOKEN/foo/bar.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://collections.example.com/collections/uuid_or_pdh/foo/bar.txt
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo/bar.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo/bar.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Indexes
+//
+// Currently, keep-web does not generate HTML index listings, nor does
+// it serve a default file like "index.html" when a directory is
+// requested. These features are likely to be added in future
+// versions. Until then, keep-web responds with 404 if a directory
+// name (or any path ending with "/") is requested.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the downstream proxy should configured to
+// return 401 for all paths beginning with "/c=".
+//
+// Same-origin URLs
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://collections.example.com/'') and upload it to some other
+// site chosen by the author of collection X.
+//
+// Attachment-Only host
+//
+// It is possible to serve untrusted content and accept user
+// credentials at the same origin as long as the content is only
+// downloaded, never executed by browsers. A single origin (hostname
+// and port) can be designated as an "attachment-only" origin: cookies
+// will be accepted and all responses will have a
+// "Content-Disposition: attachment" header. This behavior is invoked
+// only when the designated origin matches exactly the Host header
+// provided by the client or downstream proxy.
+//
+//   keep-web -listen :9999 -attachment-only-host domain.example:9999
+//
+// Trust All Content mode
+//
+// In "trust all content" mode, Keep-web will accept credentials (API
+// tokens) and serve any collection X at
+// "https://collections.example.com/collections/X/path/file.ext".
+// This is UNSAFE except in the special case where everyone who is
+// able write ANY data to Keep, and every JavaScript and HTML file
+// written to Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+//   keep-web -listen :9999 -trust-all-content
+//
+// When using trust-all-content mode, the only effect of the
+// -attachment-only-host option is to add a "Content-Disposition:
+// attachment" header.
+//
+//   keep-web -listen :9999 -attachment-only-host domain.example:9999 -trust-all-content
+//
+package main
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
new file mode 100644 (file)
index 0000000..3e38cc3
--- /dev/null
@@ -0,0 +1,316 @@
+package main
+
+import (
+       "flag"
+       "fmt"
+       "html"
+       "io"
+       "mime"
+       "net/http"
+       "net/url"
+       "os"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+type handler struct{}
+
+var (
+       clientPool         = arvadosclient.MakeClientPool()
+       trustAllContent    = false
+       attachmentOnlyHost = ""
+)
+
+func init() {
+       flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
+               "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
+       flag.BoolVar(&trustAllContent, "trust-all-content", false,
+               "Serve non-public content from a single origin. Dangerous: read docs before using!")
+}
+
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
+func parseCollectionIDFromDNSName(s string) string {
+       // Strip domain.
+       if i := strings.IndexRune(s, '.'); i >= 0 {
+               s = s[:i]
+       }
+       // Names like {uuid}--collections.example.com serve the same
+       // purpose as {uuid}.collections.example.com but can reduce
+       // cost/effort of using [additional] wildcard certificates.
+       if i := strings.Index(s, "--"); i >= 0 {
+               s = s[:i]
+       }
+       if arvadosclient.UUIDMatch(s) {
+               return s
+       }
+       if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+               return pdh
+       }
+       return ""
+}
+
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIDFromURL(s string) string {
+       if arvadosclient.UUIDMatch(s) {
+               return s
+       }
+       if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+               return pdh
+       }
+       return ""
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+       var statusCode = 0
+       var statusText string
+
+       remoteAddr := r.RemoteAddr
+       if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+               remoteAddr = xff + "," + remoteAddr
+       }
+
+       w := httpserver.WrapResponseWriter(wOrig)
+       defer func() {
+               if statusCode == 0 {
+                       statusCode = w.WroteStatus()
+               } else if w.WroteStatus() == 0 {
+                       w.WriteHeader(statusCode)
+               } else if w.WroteStatus() != statusCode {
+                       httpserver.Log(r.RemoteAddr, "WARNING",
+                               fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+               }
+               if statusText == "" {
+                       statusText = http.StatusText(statusCode)
+               }
+               httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+       }()
+
+       if r.Method != "GET" && r.Method != "POST" {
+               statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+               return
+       }
+
+       arv := clientPool.Get()
+       if arv == nil {
+               statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+               return
+       }
+       defer clientPool.Put(arv)
+
+       pathParts := strings.Split(r.URL.Path[1:], "/")
+
+       var targetID string
+       var targetPath []string
+       var tokens []string
+       var reqTokens []string
+       var pathToken bool
+       var attachment bool
+       credentialsOK := trustAllContent
+
+       if r.Host != "" && r.Host == attachmentOnlyHost {
+               credentialsOK = true
+               attachment = true
+       } else if r.FormValue("disposition") == "attachment" {
+               attachment = true
+       }
+
+       if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
+               // http://ID.collections.example/PATH...
+               credentialsOK = true
+               targetPath = pathParts
+       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+               // /c=ID/PATH...
+               targetID = parseCollectionIDFromURL(pathParts[0][2:])
+               targetPath = pathParts[1:]
+       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+               if len(pathParts) >= 5 && pathParts[1] == "download" {
+                       // /collections/download/ID/TOKEN/PATH...
+                       targetID = pathParts[2]
+                       tokens = []string{pathParts[3]}
+                       targetPath = pathParts[4:]
+                       pathToken = true
+               } else {
+                       // /collections/ID/PATH...
+                       targetID = pathParts[1]
+                       tokens = anonymousTokens
+                       targetPath = pathParts[2:]
+               }
+       } else {
+               statusCode = http.StatusNotFound
+               return
+       }
+       if t := r.FormValue("api_token"); t != "" {
+               // The client provided an explicit token in the query
+               // string, or a form in POST body. We must put the
+               // token in an HttpOnly cookie, and redirect to the
+               // same URL with the query param redacted and method =
+               // GET.
+
+               if !credentialsOK {
+                       // It is not safe to copy the provided token
+                       // into a cookie unless the current vhost
+                       // (origin) serves only a single collection or
+                       // we are in trustAllContent mode.
+                       statusCode = http.StatusBadRequest
+                       return
+               }
+
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "arvados_api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(t)),
+                       Path:     "/",
+                       HttpOnly: true,
+               })
+               redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+               w.Header().Add("Location", redir)
+               statusCode, statusText = http.StatusSeeOther, redir
+               w.WriteHeader(statusCode)
+               io.WriteString(w, `<A href="`)
+               io.WriteString(w, html.EscapeString(redir))
+               io.WriteString(w, `">Continue</A>`)
+               return
+       }
+
+       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+               // http://ID.example/t=TOKEN/PATH...
+               // /c=ID/t=TOKEN/PATH...
+               //
+               // This form must only be used to pass scoped tokens
+               // that give permission for a single collection. See
+               // FormValue case above.
+               tokens = []string{targetPath[0][2:]}
+               pathToken = true
+               targetPath = targetPath[1:]
+       }
+
+       if tokens == nil {
+               if credentialsOK {
+                       reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+               }
+               tokens = append(reqTokens, anonymousTokens...)
+       }
+
+       if len(targetPath) > 0 && targetPath[0] == "_" {
+               // If a collection has a directory called "t=foo" or
+               // "_", it can be served at
+               // //collections.example/_/t=foo/ or
+               // //collections.example/_/_/ respectively:
+               // //collections.example/t=foo/ won't work because
+               // t=foo will be interpreted as a token "foo".
+               targetPath = targetPath[1:]
+       }
+
+       tokenResult := make(map[string]int)
+       collection := make(map[string]interface{})
+       found := false
+       for _, arv.ApiToken = range tokens {
+               err := arv.Get("collections", targetID, nil, &collection)
+               if err == nil {
+                       // Success
+                       found = true
+                       break
+               }
+               if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+                       switch srvErr.HttpStatusCode {
+                       case 404, 401:
+                               // Token broken or insufficient to
+                               // retrieve collection
+                               tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+                               continue
+                       }
+               }
+               // Something more serious is wrong
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       if !found {
+               if pathToken || !credentialsOK {
+                       // Either the URL is a "secret sharing link"
+                       // that didn't work out (and asking the client
+                       // for additional credentials would just be
+                       // confusing), or we don't even accept
+                       // credentials at this path.
+                       statusCode = http.StatusNotFound
+                       return
+               }
+               for _, t := range reqTokens {
+                       if tokenResult[t] == 404 {
+                               // The client provided valid token(s), but the
+                               // collection was not found.
+                               statusCode = http.StatusNotFound
+                               return
+                       }
+               }
+               // The client's token was invalid (e.g., expired), or
+               // the client didn't even provide one.  Propagate the
+               // 401 to encourage the client to use a [different]
+               // token.
+               //
+               // TODO(TC): This response would be confusing to
+               // someone trying (anonymously) to download public
+               // data that has been deleted.  Allow a referrer to
+               // provide this context somehow?
+               w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
+               statusCode = http.StatusUnauthorized
+               return
+       }
+
+       filename := strings.Join(targetPath, "/")
+       kc, err := keepclient.MakeKeepClient(arv)
+       if err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       rdr, err := kc.CollectionFileReader(collection, filename)
+       if os.IsNotExist(err) {
+               statusCode = http.StatusNotFound
+               return
+       } else if err != nil {
+               statusCode, statusText = http.StatusBadGateway, err.Error()
+               return
+       }
+       defer rdr.Close()
+
+       // One or both of these can be -1 if not found:
+       basenamePos := strings.LastIndex(filename, "/")
+       extPos := strings.LastIndex(filename, ".")
+       if extPos > basenamePos {
+               // Now extPos is safely >= 0.
+               if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+                       w.Header().Set("Content-Type", t)
+               }
+       }
+       if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
+               w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
+       }
+       if attachment {
+               w.Header().Set("Content-Disposition", "attachment")
+       }
+
+       w.WriteHeader(http.StatusOK)
+       _, err = io.Copy(w, rdr)
+       if err != nil {
+               statusCode, statusText = http.StatusBadGateway, err.Error()
+       }
+}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
new file mode 100644 (file)
index 0000000..392de94
--- /dev/null
@@ -0,0 +1,356 @@
+package main
+
+import (
+       "html"
+       "io/ioutil"
+       "net/http"
+       "net/http/httptest"
+       "net/url"
+       "regexp"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct{}
+
+func mustParseURL(s string) *url.URL {
+       r, err := url.Parse(s)
+       if err != nil {
+               panic("parse URL: " + s)
+       }
+       return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+       for _, testURL := range []string{
+               arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+               arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+       } {
+               resp := httptest.NewRecorder()
+               req := &http.Request{
+                       Method: "GET",
+                       URL:    mustParseURL(testURL),
+               }
+               (&handler{}).ServeHTTP(resp, req)
+               c.Check(resp.Code, check.Equals, http.StatusNotFound)
+               c.Check(resp.Body.String(), check.Equals, "")
+       }
+}
+
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keep-web if
+// the token is invalid.
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+       doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+       r.Header.Add("Authorization", "OAuth2 "+tok)
+       return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+       doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+       r.AddCookie(&http.Cookie{
+               Name:  "arvados_api_token",
+               Value: auth.EncodeTokenCookie([]byte(tok)),
+       })
+       return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+       doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+       r.URL.Path = "/t=" + tok + r.URL.Path
+       return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+       doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+       r.URL.RawQuery = "api_token=" + tok
+       return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+       doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+       r.Method = "POST"
+       r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+       r.Body = ioutil.NopCloser(strings.NewReader(
+               url.Values{"api_token": {tok}}.Encode()))
+       return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+       for _, hostPath := range []string{
+               arvadostest.FooCollection + ".example.com/foo",
+               arvadostest.FooCollection + "--collections.example.com/foo",
+               arvadostest.FooCollection + "--collections.example.com/_/foo",
+               arvadostest.FooPdh + ".example.com/foo",
+               strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--collections.example.com/foo",
+               arvadostest.FooBarDirCollection + ".example.com/dir1/foo",
+       } {
+               c.Log("doRequests: ", hostPath)
+               doVhostRequestsWithHostPath(c, authz, hostPath)
+       }
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
+       for _, tok := range []string{
+               arvadostest.ActiveToken,
+               arvadostest.ActiveToken[:15],
+               arvadostest.SpectatorToken,
+               "bogus",
+               "",
+       } {
+               u := mustParseURL("http://" + hostPath)
+               req := &http.Request{
+                       Method: "GET",
+                       Host:   u.Host,
+                       URL:    u,
+                       Header: http.Header{},
+               }
+               failCode := authz(req, tok)
+               resp := doReq(req)
+               code, body := resp.Code, resp.Body.String()
+               if tok == arvadostest.ActiveToken {
+                       c.Check(code, check.Equals, http.StatusOK)
+                       c.Check(body, check.Equals, "foo")
+               } else {
+                       c.Check(code >= 400, check.Equals, true)
+                       c.Check(code < 500, check.Equals, true)
+                       if tok == arvadostest.SpectatorToken {
+                               // Valid token never offers to retry
+                               // with different credentials.
+                               c.Check(code, check.Equals, http.StatusNotFound)
+                       } else {
+                               // Invalid token can ask to retry
+                               // depending on the authz method.
+                               c.Check(code, check.Equals, failCode)
+                       }
+                       c.Check(body, check.Equals, "")
+               }
+       }
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+       resp := httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       if resp.Code != http.StatusSeeOther {
+               return resp
+       }
+       cookies := (&http.Response{Header: resp.Header()}).Cookies()
+       u, _ := req.URL.Parse(resp.Header().Get("Location"))
+       req = &http.Request{
+               Method: "GET",
+               Host:   u.Host,
+               URL:    u,
+               Header: http.Header{},
+       }
+       for _, c := range cookies {
+               req.AddCookie(c)
+       }
+       return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               arvadostest.FooCollection+".example.com/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+}
+
+func (s *IntegrationSuite) TestSingleOriginSecretLink(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.ActiveToken+"/foo",
+               "",
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+}
+
+// Bad token in URL is 404 Not Found because it doesn't make sense to
+// retry the same URL with different authorization.
+func (s *IntegrationSuite) TestSingleOriginSecretLinkBadToken(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/t=bogus/foo",
+               "",
+               "",
+               "",
+               http.StatusNotFound,
+               "",
+       )
+}
+
+// Bad token in a cookie (even if it got there via our own
+// query-string-to-cookie redirect) is, in principle, retryable at the
+// same URL so it's 401 Unauthorized.
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToBogusCookie(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               arvadostest.FooCollection+".example.com/foo",
+               "?api_token=thisisabogustoken",
+               "",
+               "",
+               http.StatusUnauthorized,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusBadRequest,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
+       defer func(orig bool) {
+               trustAllContent = orig
+       }(trustAllContent)
+       trustAllContent = true
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
+       defer func(orig string) {
+               attachmentOnlyHost = orig
+       }(attachmentOnlyHost)
+       attachmentOnlyHost = "example.com:1234"
+
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusBadRequest,
+               "",
+       )
+
+       resp := s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com:1234/c="+arvadostest.FooCollection+"/foo",
+               "?api_token="+arvadostest.ActiveToken,
+               "",
+               "",
+               http.StatusOK,
+               "foo",
+       )
+       c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "POST",
+               arvadostest.FooCollection+".example.com/foo",
+               "",
+               "application/x-www-form-urlencoded",
+               url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+               http.StatusOK,
+               "foo",
+       )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+       s.testVhostRedirectTokenToCookie(c, "POST",
+               arvadostest.FooCollection+".example.com/foo",
+               "",
+               "application/x-www-form-urlencoded",
+               url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+               http.StatusNotFound,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
+       anonymousTokens = []string{arvadostest.AnonymousToken}
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
+               "",
+               "",
+               "",
+               http.StatusOK,
+               "Hello world\n",
+       )
+}
+
+func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
+       anonymousTokens = []string{"anonymousTokenConfiguredButInvalid"}
+       s.testVhostRedirectTokenToCookie(c, "GET",
+               "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
+               "",
+               "",
+               "",
+               http.StatusNotFound,
+               "",
+       )
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, reqBody string, expectStatus int, expectRespBody string) *httptest.ResponseRecorder {
+       u, _ := url.Parse(`http://` + hostPath + queryString)
+       req := &http.Request{
+               Method: method,
+               Host:   u.Host,
+               URL:    u,
+               Header: http.Header{"Content-Type": {contentType}},
+               Body:   ioutil.NopCloser(strings.NewReader(reqBody)),
+       }
+
+       resp := httptest.NewRecorder()
+       defer func() {
+               c.Check(resp.Code, check.Equals, expectStatus)
+               c.Check(resp.Body.String(), check.Equals, expectRespBody)
+       }()
+
+       (&handler{}).ServeHTTP(resp, req)
+       if resp.Code != http.StatusSeeOther {
+               return resp
+       }
+       c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`".*`)
+       cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+       u, _ = u.Parse(resp.Header().Get("Location"))
+       req = &http.Request{
+               Method: "GET",
+               Host:   u.Host,
+               URL:    u,
+               Header: http.Header{},
+       }
+       for _, c := range cookies {
+               req.AddCookie(c)
+       }
+
+       resp = httptest.NewRecorder()
+       (&handler{}).ServeHTTP(resp, req)
+       c.Check(resp.Header().Get("Location"), check.Equals, "")
+       return resp
+}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
new file mode 100644 (file)
index 0000000..135f01b
--- /dev/null
@@ -0,0 +1,33 @@
+package main
+
+import (
+       "flag"
+       "log"
+       "os"
+)
+
+func init() {
+       // MakeArvadosClient returns an error if this env var isn't
+       // available as a default token (even if we explicitly set a
+       // different token before doing anything with the client). We
+       // set this dummy value during init so it doesn't clobber the
+       // one used by "run test servers".
+       if os.Getenv("ARVADOS_API_TOKEN") == "" {
+               os.Setenv("ARVADOS_API_TOKEN", "xxx")
+       }
+}
+
+func main() {
+       flag.Parse()
+       if os.Getenv("ARVADOS_API_HOST") == "" {
+               log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+       }
+       srv := &server{}
+       if err := srv.Start(); err != nil {
+               log.Fatal(err)
+       }
+       log.Println("Listening at", srv.Addr)
+       if err := srv.Wait(); err != nil {
+               log.Fatal(err)
+       }
+}
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
new file mode 100644 (file)
index 0000000..1009008
--- /dev/null
@@ -0,0 +1,27 @@
+package main
+
+import (
+       "flag"
+       "net/http"
+
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+       flag.StringVar(&address, "listen", ":80",
+               "Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
+}
+
+type server struct {
+       httpserver.Server
+}
+
+func (srv *server) Start() error {
+       mux := http.NewServeMux()
+       mux.Handle("/", &handler{})
+       srv.Handler = mux
+       srv.Addr = address
+       return srv.Server.Start()
+}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
new file mode 100644 (file)
index 0000000..cda8b17
--- /dev/null
@@ -0,0 +1,322 @@
+package main
+
+import (
+       "crypto/md5"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "net"
+       "os/exec"
+       "strings"
+       "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server and a keep-web server
+type IntegrationSuite struct {
+       testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+       for _, token := range []string{
+               "",
+               "bogustoken",
+       } {
+               hdr, body, _ := s.runCurl(c, token, "collections.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+               c.Check(body, check.Equals, "")
+
+               if token != "" {
+                       hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+                       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+                       c.Check(body, check.Equals, "")
+               }
+
+               hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/bad-route")
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+               c.Check(body, check.Equals, "")
+       }
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+       for _, uri := range []string{
+               // Routing errors (always 404 regardless of what's stored in Keep)
+               "/",
+               "/foo",
+               "/download",
+               "/collections",
+               "/collections/",
+               // Implicit/generated index is not implemented yet;
+               // until then, return 404.
+               "/collections/" + arvadostest.FooCollection,
+               "/collections/" + arvadostest.FooCollection + "/",
+               "/collections/" + arvadostest.FooBarDirCollection + "/dir1",
+               "/collections/" + arvadostest.FooBarDirCollection + "/dir1/",
+               // Non-existent file in collection
+               "/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+               "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+               // Non-existent collection
+               "/collections/" + arvadostest.NonexistentCollection,
+               "/collections/" + arvadostest.NonexistentCollection + "/",
+               "/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+               "/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+       } {
+               hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "collections.example.com", uri)
+               c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+               c.Check(body, check.Equals, "")
+       }
+}
+
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+       if testing.Short() {
+               c.Skip("skipping 1GB integration test in short mode")
+       }
+       s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+       s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+       testdata := make([]byte, blocksize)
+       for i := 0; i < blocksize; i++ {
+               testdata[i] = byte(' ')
+       }
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.Equals, nil)
+       arv.ApiToken = arvadostest.ActiveToken
+       kc, err := keepclient.MakeKeepClient(&arv)
+       c.Assert(err, check.Equals, nil)
+       loc, _, err := kc.PutB(testdata[:])
+       c.Assert(err, check.Equals, nil)
+       mtext := "."
+       for i := 0; i < 100; i++ {
+               mtext = mtext + " " + loc
+       }
+       mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+       coll := map[string]interface{}{}
+       err = arv.Create("collections",
+               map[string]interface{}{
+                       "collection": map[string]interface{}{
+                               "name":          fmt.Sprintf("testdata blocksize=%d", blocksize),
+                               "manifest_text": mtext,
+                       },
+               }, &coll)
+       c.Assert(err, check.Equals, nil)
+       uuid := coll["uuid"].(string)
+
+       hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".collections.example.com", "/testdata.bin")
+       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+       c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+       c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+       c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
+type curlCase struct {
+       auth    string
+       host    string
+       path    string
+       dataMD5 string
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+       anonymousTokens = []string{arvadostest.AnonymousToken}
+       for _, spec := range []curlCase{
+               // My collection
+               {
+                       auth:    arvadostest.ActiveToken,
+                       host:    arvadostest.FooCollection + "--collections.example.com",
+                       path:    "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    arvadostest.ActiveToken,
+                       host:    arvadostest.FooCollection + ".collections.example.com",
+                       path:    "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       host:    strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".collections.example.com",
+                       path:    "/t=" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path:    "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path:    "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    "tokensobogus",
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    arvadostest.ActiveToken,
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+               {
+                       auth:    arvadostest.AnonymousToken,
+                       path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+                       dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+               },
+
+               // Anonymously accessible data
+               {
+                       path:    "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       host:    arvadostest.HelloWorldCollection + ".collections.example.com",
+                       path:    "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       host:    arvadostest.HelloWorldCollection + ".collections.example.com",
+                       path:    "/_/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.ActiveToken,
+                       path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.SpectatorToken,
+                       path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.SpectatorToken,
+                       host:    arvadostest.HelloWorldCollection + "--collections.example.com",
+                       path:    "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+               {
+                       auth:    arvadostest.SpectatorToken,
+                       path:    "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+                       dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+               },
+       } {
+               host := spec.host
+               if host == "" {
+                       host = "collections.example.com"
+               }
+               hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+               if strings.HasSuffix(spec.path, ".txt") {
+                       c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+                       // TODO: Check some types that aren't
+                       // automatically detected by Go's http server
+                       // by sniffing the content.
+               }
+               c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
+       }
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
+       curlArgs := []string{"--silent", "--show-error", "--include"}
+       testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+       curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
+       if token != "" {
+               curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+       }
+       curlArgs = append(curlArgs, args...)
+       curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
+       c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+       cmd := exec.Command("curl", curlArgs...)
+       stdout, err := cmd.StdoutPipe()
+       c.Assert(err, check.Equals, nil)
+       cmd.Stderr = cmd.Stdout
+       go cmd.Start()
+       buf := make([]byte, 2<<27)
+       n, err := io.ReadFull(stdout, buf)
+       // Discard (but measure size of) anything past 128 MiB.
+       var discarded int64
+       if err == io.ErrUnexpectedEOF {
+               err = nil
+               buf = buf[:n]
+       } else {
+               c.Assert(err, check.Equals, nil)
+               discarded, err = io.Copy(ioutil.Discard, stdout)
+               c.Assert(err, check.Equals, nil)
+       }
+       err = cmd.Wait()
+       // Without "-f", curl exits 0 as long as it gets a valid HTTP
+       // response from the server, even if the response status
+       // indicates that the request failed. In our test suite, we
+       // always expect a valid HTTP response, and we parse the
+       // headers ourselves. If curl exits non-zero, our testing
+       // environment is broken.
+       c.Assert(err, check.Equals, nil)
+       hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
+       c.Assert(len(hdrsAndBody), check.Equals, 2)
+       hdr = hdrsAndBody[0]
+       bodyPart = hdrsAndBody[1]
+       bodySize = int64(len(bodyPart)) + discarded
+       return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+       arvadostest.StartAPI()
+       arvadostest.StartKeep(2, true)
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.Equals, nil)
+       arv.ApiToken = arvadostest.ActiveToken
+       kc, err := keepclient.MakeKeepClient(&arv)
+       c.Assert(err, check.Equals, nil)
+       kc.PutB([]byte("Hello world\n"))
+       kc.PutB([]byte("foo"))
+       kc.PutB([]byte("foobar"))
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+       arvadostest.StopKeep(2)
+       arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+       arvadostest.ResetEnv()
+       s.testServer = &server{}
+       var err error
+       address = "127.0.0.1:0"
+       err = s.testServer.Start()
+       c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+       var err error
+       if s.testServer != nil {
+               err = s.testServer.Close()
+       }
+       c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
index d3dbeaf89e420d4546d0fedab643d9b15e9849c9..79ed51eb0e00f57eb38d4a31251a87c2bc5e866c 100644 (file)
@@ -37,7 +37,7 @@ func main() {
                pidfile          string
        )
 
-       flagset := flag.NewFlagSet("default", flag.ExitOnError)
+       flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
        flagset.StringVar(
                &listen,
@@ -84,6 +84,9 @@ func main() {
                log.Fatalf("Error setting up arvados client %s", err.Error())
        }
 
+       if os.Getenv("ARVADOS_DEBUG") != "" {
+               keepclient.DebugPrintf = log.Printf
+       }
        kc, err := keepclient.MakeKeepClient(&arv)
        if err != nil {
                log.Fatalf("Error setting up keep client %s", err.Error())
@@ -201,7 +204,7 @@ func GetRemoteAddress(req *http.Request) string {
        return req.RemoteAddr
 }
 
-func CheckAuthorizationHeader(kc keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
+func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
        var auth string
        if auth = req.Header.Get("Authorization"); auth == "" {
                return false, ""
@@ -331,7 +334,7 @@ func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
 
        var pass bool
        var tok string
-       if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
+       if pass, tok = CheckAuthorizationHeader(&kc, this.ApiTokenCache, req); !pass {
                status, err = http.StatusForbidden, BadAuthorizationHeader
                return
        }
@@ -438,7 +441,7 @@ func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
 
        var pass bool
        var tok string
-       if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
+       if pass, tok = CheckAuthorizationHeader(&kc, this.ApiTokenCache, req); !pass {
                err = BadAuthorizationHeader
                status = http.StatusForbidden
                return
@@ -521,7 +524,7 @@ func (handler IndexHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
 
        kc := *handler.KeepClient
 
-       ok, token := CheckAuthorizationHeader(kc, handler.ApiTokenCache, req)
+       ok, token := CheckAuthorizationHeader(&kc, handler.ApiTokenCache, req)
        if !ok {
                status, err = http.StatusForbidden, BadAuthorizationHeader
                return
index 997163eca42c965f41109af3ad51de22e65c80d3..88b86e063b11874092deec7595b7014fb84a8277 100644 (file)
@@ -121,7 +121,7 @@ func setupProxyService() {
        }
 }
 
-func runProxy(c *C, args []string, port int, bogusClientToken bool) keepclient.KeepClient {
+func runProxy(c *C, args []string, port int, bogusClientToken bool) *keepclient.KeepClient {
        if bogusClientToken {
                os.Setenv("ARVADOS_API_TOKEN", "bogus-token")
        }
@@ -156,7 +156,7 @@ func runProxy(c *C, args []string, port int, bogusClientToken bool) keepclient.K
                go main()
        }
 
-       return kc
+       return &kc
 }
 
 func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
@@ -467,6 +467,8 @@ func (s *ServerRequiredSuite) TestGetIndex(c *C) {
        _, rep, err = kc.PutB([]byte("some-more-index-data"))
        c.Check(err, Equals, nil)
 
+       kc.Arvados.ApiToken = arvadostest.DataManagerToken
+
        // Invoke GetIndex
        for _, spec := range []struct {
                prefix         string
index 2528f6d6a6c4dbf2f4b509e670c834aa10b9e618..7525441aaec995d4a524a7ff20f954ee06ae50c9 100644 (file)
@@ -33,10 +33,6 @@ const BlockSize = 64 * 1024 * 1024
 // in order to permit writes.
 const MinFreeKilobytes = BlockSize / 1024
 
-// Until #6221 is resolved, never_delete must be true.
-// However, allow it to be false in testing with TestDataManagerToken
-const TestDataManagerToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-
 // ProcMounts /proc/mounts
 var ProcMounts = "/proc/mounts"
 
@@ -159,8 +155,9 @@ func main() {
                &neverDelete,
                "never-delete",
                true,
-               "If set, nothing will be deleted. HTTP 405 will be returned "+
-                       "for valid DELETE requests.")
+               "If true, nothing will be deleted. "+
+                       "Warning: the relevant features in keepstore and data manager have not been extensively tested. "+
+                       "You should leave this option alone unless you can afford to lose data.")
        flag.StringVar(
                &blobSigningKeyFile,
                "permission-key-file",
@@ -257,8 +254,9 @@ func main() {
                }
        }
 
-       if neverDelete != true && dataManagerToken != TestDataManagerToken {
-               log.Fatal("never_delete must be true, see #6221")
+       if neverDelete != true {
+               log.Print("never-delete is not set. Warning: the relevant features in keepstore and data manager have not " +
+                       "been extensively tested. You should leave this option alone unless you can afford to lose data.")
        }
 
        if blobSigningKeyFile != "" {
index 1c828c13c3b5033aa92353c2b403c179e00120c6..3c708778d0c274b17c0d9c2c42b8c0b768da331b 100644 (file)
@@ -154,6 +154,10 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
 
     This actor simply destroys a cloud node, retrying as needed.
     """
+    # Reasons for a shutdown to be cancelled.
+    WINDOW_CLOSED = "shutdown window closed"
+    NODE_BROKEN = "cloud failed to shut down broken node"
+
     def __init__(self, timer_actor, cloud_client, arvados_client, node_monitor,
                  cancellable=True, retry_wait=1, max_retry_wait=180):
         # If a ShutdownActor is cancellable, it will ask the
@@ -167,6 +171,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         self._monitor = node_monitor.proxy()
         self.cloud_node = self._monitor.cloud_node.get()
         self.cancellable = cancellable
+        self.cancel_reason = None
         self.success = None
 
     def on_start(self):
@@ -180,7 +185,10 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
             self.success = success_flag
         return super(ComputeNodeShutdownActor, self)._finished()
 
-    def cancel_shutdown(self):
+    def cancel_shutdown(self, reason):
+        self.cancel_reason = reason
+        self._logger.info("Cloud node %s shutdown cancelled: %s.",
+                          self.cloud_node.id, reason)
         self._finished(success_flag=False)
 
     def _stop_if_window_closed(orig_func):
@@ -188,10 +196,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         def stop_wrapper(self, *args, **kwargs):
             if (self.cancellable and
                   (not self._monitor.shutdown_eligible().get())):
-                self._logger.info(
-                    "Cloud node %s shutdown cancelled - no longer eligible.",
-                    self.cloud_node.id)
-                self._later.cancel_shutdown()
+                self._later.cancel_shutdown(self.WINDOW_CLOSED)
                 return None
             else:
                 return orig_func(self, *args, **kwargs)
@@ -201,8 +206,11 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
     @ComputeNodeStateChangeBase._retry()
     def shutdown_node(self):
         if not self._cloud.destroy_node(self.cloud_node):
-            # Force a retry.
-            raise cloud_types.LibcloudError("destroy_node failed")
+            if self._cloud.broken(self.cloud_node):
+                self._later.cancel_shutdown(self.NODE_BROKEN)
+            else:
+                # Force a retry.
+                raise cloud_types.LibcloudError("destroy_node failed")
         self._logger.info("Cloud node %s shut down.", self.cloud_node.id)
         arv_node = self._arvados_node()
         if arv_node is None:
index ec5014e9f9cf1e8848353cf3c755e22875227850..919b57f42c8973bab91de742d1fee48598296f35 100644 (file)
@@ -43,7 +43,7 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
     # error are still being investigated.
 
     @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
-    def cancel_shutdown(self):
+    def cancel_shutdown(self, reason):
         if self._nodename:
             if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
                 # Resume from "drng" or "drain"
@@ -52,7 +52,7 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
                 # Node is in a state such as 'idle' or 'alloc' so don't
                 # try to resume it because that will just raise an error.
                 pass
-        return super(ComputeNodeShutdownActor, self).cancel_shutdown()
+        return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
 
     @ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
     @ShutdownActorBase._stop_if_window_closed
index 1d52073ce5ff7a362e0b8e9cc13b12c0f2d3b6a2..a65e9a0705d1cd5941140a20dbd4c6d4e1f5fd57 100644 (file)
@@ -25,6 +25,7 @@ class _BaseNodeTracker(object):
     def __init__(self):
         self.nodes = {}
         self.orphans = {}
+        self._blacklist = set()
 
     # Proxy the methods listed below to self.nodes.
     def _proxy_method(name):
@@ -43,6 +44,9 @@ class _BaseNodeTracker(object):
     def add(self, record):
         self.nodes[self.record_key(record)] = record
 
+    def blacklist(self, key):
+        self._blacklist.add(key)
+
     def update_record(self, key, item):
         setattr(self.nodes[key], self.RECORD_ATTR, item)
 
@@ -50,7 +54,9 @@ class _BaseNodeTracker(object):
         unseen = set(self.nodes.iterkeys())
         for item in response:
             key = self.item_key(item)
-            if key in unseen:
+            if key in self._blacklist:
+                continue
+            elif key in unseen:
                 unseen.remove(key)
                 self.update_record(key, item)
             else:
@@ -346,11 +352,13 @@ class NodeManagerDaemonActor(actor_class):
             self._begin_node_shutdown(record.actor, cancellable=False)
 
     def node_finished_shutdown(self, shutdown_actor):
-        success, cloud_node = self._get_actor_attrs(shutdown_actor, 'success',
-                                                    'cloud_node')
+        cloud_node, success, cancel_reason = self._get_actor_attrs(
+            shutdown_actor, 'cloud_node', 'success', 'cancel_reason')
         shutdown_actor.stop()
         cloud_node_id = cloud_node.id
         if not success:
+            if cancel_reason == self._node_shutdown.NODE_BROKEN:
+                self.cloud_nodes.blacklist(cloud_node_id)
             del self.shutdowns[cloud_node_id]
         elif cloud_node_id in self.booted:
             self.booted.pop(cloud_node_id).actor.stop()
index e718fc134b7a20723f7f33ef5aeefd5095763eb6..0bdb2cba471f4beac0c4b8a90f17c12a93c7bdf8 100644 (file)
@@ -116,11 +116,12 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
 
 class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
     def make_mocks(self, cloud_node=None, arvados_node=None,
-                   shutdown_open=True):
+                   shutdown_open=True, node_broken=False):
         self.timer = testutil.MockTimer()
         self.shutdowns = testutil.MockShutdownTimer()
         self.shutdowns._set_state(shutdown_open, 300)
         self.cloud_client = mock.MagicMock(name='cloud_client')
+        self.cloud_client.broken.return_value = node_broken
         self.arvados_client = mock.MagicMock(name='arvados_client')
         self.updates = mock.MagicMock(name='update_mock')
         if cloud_node is None:
@@ -201,6 +202,8 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.make_actor()
         self.check_success_flag(False, 2)
         self.assertFalse(self.cloud_client.destroy_node.called)
+        self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED,
+                         self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
 
     def test_shutdown_retries_when_cloud_fails(self):
         self.make_mocks()
@@ -210,6 +213,15 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.cloud_client.destroy_node.return_value = True
         self.check_success_flag(True)
 
+    def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
+        self.make_mocks(node_broken=True)
+        self.cloud_client.destroy_node.return_value = False
+        self.make_actor(start_time=0)
+        self.check_success_flag(False, 2)
+        self.assertEqual(1, self.cloud_client.destroy_node.call_count)
+        self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
+                         self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
+
     def test_late_subscribe(self):
         self.make_actor()
         subscriber = mock.Mock(name='subscriber_mock')
index 16f560457765e3878e8e4c2a1bae6d46f8615a43..bbfbe4b7452504ad9935729b1654821bddbd3903 100644 (file)
@@ -449,6 +449,26 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
         self.assertTrue(shutdown_proxy.called)
 
+    def test_broken_node_blackholed_after_cancelled_shutdown(self):
+        cloud_node = testutil.cloud_node_mock(8)
+        wishlist = [testutil.MockSize(8)]
+        self.make_daemon([cloud_node], [testutil.arvados_node_mock(8)],
+                         wishlist)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.assertFalse(self.node_setup.start.called)
+        monitor = self.monitor_list()[0].proxy()
+        shutdown_proxy = self.node_shutdown.start().proxy
+        shutdown_proxy().cloud_node.get.return_value = cloud_node
+        shutdown_proxy().success.get.return_value = False
+        shutdown_proxy().cancel_reason.get.return_value = self.node_shutdown.NODE_BROKEN
+        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.daemon.node_finished_shutdown(shutdown_proxy()).get(self.TIMEOUT)
+        self.daemon.update_cloud_nodes([cloud_node]).get(self.TIMEOUT)
+        self.daemon.update_server_wishlist(wishlist).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.node_setup.start.call_count)
+
     def test_nodes_shutting_down_replaced_below_max_nodes(self):
         cloud_node = testutil.cloud_node_mock(6)
         self.make_daemon([cloud_node], [testutil.arvados_node_mock(6)])
index e72889038850631bb8205ee1602326d307d729fc..9432a0d383b534236ff506c83ac98ed0c196c324 100644 (file)
@@ -81,13 +81,13 @@ func setupRsync(c *C, enforcePermissions bool, replications int) {
        // srcConfig
        var srcConfig apiConfig
        srcConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
-       srcConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+       srcConfig.APIToken = arvadostest.DataManagerToken
        srcConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
 
        // dstConfig
        var dstConfig apiConfig
        dstConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
-       dstConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+       dstConfig.APIToken = arvadostest.DataManagerToken
        dstConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
 
        if enforcePermissions {
@@ -389,7 +389,7 @@ func (s *ServerNotRequiredSuite) TestLoadConfig(c *C) {
        c.Check(err, IsNil)
 
        c.Assert(srcConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
-       c.Assert(srcConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+       c.Assert(srcConfig.APIToken, Equals, arvadostest.DataManagerToken)
        c.Assert(srcConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
        c.Assert(srcConfig.ExternalClient, Equals, false)
 
@@ -397,7 +397,7 @@ func (s *ServerNotRequiredSuite) TestLoadConfig(c *C) {
        c.Check(err, IsNil)
 
        c.Assert(dstConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
-       c.Assert(dstConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+       c.Assert(dstConfig.APIToken, Equals, arvadostest.DataManagerToken)
        c.Assert(dstConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
        c.Assert(dstConfig.ExternalClient, Equals, false)
 
@@ -422,7 +422,7 @@ func setupConfigFile(c *C, name string) *os.File {
        c.Check(err, IsNil)
 
        fileContent := "ARVADOS_API_HOST=" + os.Getenv("ARVADOS_API_HOST") + "\n"
-       fileContent += "ARVADOS_API_TOKEN=" + os.Getenv("ARVADOS_API_TOKEN") + "\n"
+       fileContent += "ARVADOS_API_TOKEN=" + arvadostest.DataManagerToken + "\n"
        fileContent += "ARVADOS_API_HOST_INSECURE=" + os.Getenv("ARVADOS_API_HOST_INSECURE") + "\n"
        fileContent += "ARVADOS_EXTERNAL_CLIENT=false\n"
        fileContent += "ARVADOS_BLOB_SIGNING_KEY=abcdefg"