- install/install-keepstore.html.textile.liquid
- install/configure-azure-blob-storage.html.textile.liquid
- install/install-keepproxy.html.textile.liquid
+ #- install/install-keep-web.html.textile.liquid
- install/install-crunch-dispatch.html.textile.liquid
- install/install-compute-node.html.textile.liquid
- Helpful hints:
The arvados-docker-cleaner program removes least recently used docker images as needed to keep disk usage below a configured limit.
+{% include 'notebox_begin' %}
+This also removes all containers as soon as they exit, as if they were run with `docker run --rm`. If you need to debug or inspect containers after they stop, temporarily stop arvados-docker-cleaner or run it with `--remove-stopped-containers never`.
+{% include 'notebox_end' %}
+
On Debian-based systems, install runit:
<notextile>
export CRUNCH_JOB_DOCKER_BIN=<span class="userinput">docker.io</span>
fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
-cd /var/www/arvados-api/services/api
+cd /var/www/arvados-api/current
exec $rvmexec bundle exec ./script/crunch-dispatch.rb 2>&1
</code></pre>
</notextile>
--- /dev/null
+---
+layout: default
+navsection: installguide
+title: Install the keep-web server
+...
+
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
+
+By convention, we use the following hostname for the keep-web service:
+
+<notextile>
+<pre><code>collections.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+This hostname should resolve from anywhere on the internet.
+
+h2. Install keep-web
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install keep-web</span>
+</code></pre>
+</notextile>
+
+Verify that @keep-web@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keep-web -h</span>
+Usage of keep-web:
+ -allow-anonymous
+ Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection. (default false)
+ -attachment-only-host string
+ Accept credentials, and add "Content-Disposition: attachment" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.
+ -listen string
+ Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+ -trust-all-content
+ Serve non-public content from a single origin. Dangerous: read docs before using!
+</code></pre>
+</notextile>
+
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
+
+We recommend running @keep-web@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keep-web@ is:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+export ARVADOS_API_TOKEN="<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span>"
+exec sudo -u nobody keep-web -listen=<span class="userinput">:9002</span> -allow-anonymous 2>&1
+</code></pre>
+</notextile>
+
+Omit the @-allow-anonymous@ argument if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
+h3. Set up a reverse proxy with SSL support
+
+The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
+
+Note: A wildcard SSL certificate is required in order to proxy keep-web effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keep-web {
+ server 127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+ listen <span class="userinput">[your public IP address]</span>:443 ssl;
+ server_name collections.<span class="userinput">uuid_prefix</span>.your.domain *.collections.<span class="userinput">uuid_prefix</span>.your.domain ~.*--collections.<span class="userinput">uuid_prefix</span>.your.domain;
+
+ proxy_connect_timeout 90s;
+ proxy_read_timeout 300s;
+
+ ssl on;
+ ssl_certificate <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+ ssl_certificate_key <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+ location / {
+ proxy_pass http://keep-web;
+ proxy_set_header Host $host;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ }
+}
+</pre></notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @*--collections.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
+* @*.collections.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
+* @collections.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
+
+h3. Tell Workbench about the keep-web service
+
+Add *one* of the following entries to your Workbench configuration file (@/etc/arvados/workbench/application.yml@), depending on your DNS setup:
+
+<notextile>
+<pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
+keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
title: Install Keepproxy server
...
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for clients located elsewhere on the internet. Specifically, in contrast to Keepstore:
+* A client writing through Keepproxy generates less network traffic: the client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers.
+* A client can write through Keepproxy without precomputing content hashes. Notably, the browser-based upload feature in Workbench requires Keepproxy.
+* Keepproxy checks API token validity before processing requests. (Clients that can connect directly to Keepstore can use it as scratch space even without a valid API token.)
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
<div class="offset1">
table(table table-bordered table-condensed).
<notextile>
<pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
-default-replicas=2: Default number of replicas to write if not specified by the client.
-listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
-no-get=false: If set, disable GET operations
-no-put=false: If set, disable PUT operations
-pid="": Path to write pid file
+ -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
</code></pre>
</notextile>
On the <strong>API server</strong>, use the following command to create the token:
<notextile>
-<pre><code>~/arvados/services/api/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
</code></pre></notextile>
}
srun(["srun", "--nodelist=" . $node[0]],
["/bin/sh", "-ec",
- "a=`$docker_bin run --rm $try_user_arg $docker_hash id --user` && " .
+ "a=`$docker_bin run $try_user_arg $docker_hash id --user` && " .
" test \$a -ne 0"],
{fork => 1});
if ($? == 0) {
$command .= "&& exec arv-mount --by-pdh --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
if ($docker_hash)
{
- my $cidfile = "$ENV{CRUNCH_TMP}/$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}.cid";
+ my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
+ my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
$command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
- $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
+ $command .= "$docker_bin run --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
# We only set memory limits if Docker lets us limit both memory and swap.
# Memory limits alone have been supported longer, but subprocesses tend
# to get SIGKILL if they exceed that without any swap limit set.
import logging
import re
import os
+
from cwltool.process import get_feature
logger = logging.getLogger('arvados.cwl-runner')
return dockerRequirement["dockerImageId"]
-class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
+
+class CollectionFsAccess(cwltool.process.StdFsAccess):
def __init__(self, basedir):
self.collections = {}
self.basedir = basedir
def get_collection(self, path):
p = path.split("/")
- if arvados.util.keep_locator_pattern.match(p[0]):
- if p[0] not in self.collections:
- self.collections[p[0]] = arvados.collection.CollectionReader(p[0])
- return (self.collections[p[0]], "/".join(p[1:]))
+ if p[0].startswith("keep:") and arvados.util.keep_locator_pattern.match(p[0][5:]):
+ pdh = p[0][5:]
+ if pdh not in self.collections:
+ self.collections[pdh] = arvados.collection.CollectionReader(pdh)
+ return (self.collections[pdh], "/".join(p[1:]))
else:
return (None, path)
def _match(self, collection, patternsegments, parent):
+ if not patternsegments:
+ return []
+
+ if not isinstance(collection, arvados.collection.RichCollectionBase):
+ return []
+
ret = []
+ # iterate over the files and subcollections in 'collection'
for filename in collection:
- if fnmatch.fnmatch(filename, patternsegments[0]):
+ if patternsegments[0] == '.':
+ # Pattern contains something like "./foo" so just shift
+ # past the "./"
+ ret.extend(self._match(collection, patternsegments[1:], parent))
+ elif fnmatch.fnmatch(filename, patternsegments[0]):
cur = os.path.join(parent, filename)
if len(patternsegments) == 1:
ret.append(cur)
def glob(self, pattern):
collection, rest = self.get_collection(pattern)
patternsegments = rest.split("/")
- return self._match(collection, patternsegments, collection.manifest_locator())
+ return self._match(collection, patternsegments, "keep:" + collection.manifest_locator())
def open(self, fn, mode):
collection, rest = self.get_collection(fn)
if self.generatefiles:
vwd = arvados.collection.Collection()
+ script_parameters["task.vwd"] = {}
for t in self.generatefiles:
if isinstance(self.generatefiles[t], dict):
- src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
+ src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"].replace("$(task.keep)/", "keep:"))
vwd.copy(rest, t, source_collection=src)
else:
with vwd.open(t, "w") as f:
f.write(self.generatefiles[t])
vwd.save_new()
- script_parameters["task.vwd"] = vwd.portable_data_hash()
+ for t in self.generatefiles:
+ script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), t)
script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
if self.environment:
(docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
if docker_req and kwargs.get("use_container") is not False:
runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
- runtime_constraints["arvados_sdk_version"] = "master"
- response = self.arvrunner.api.jobs().create(body={
- "script": "run-command",
- "repository": "arvados",
- "script_version": "master",
- "script_parameters": script_parameters,
- "runtime_constraints": runtime_constraints
- }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+ try:
+ response = self.arvrunner.api.jobs().create(body={
+ "script": "crunchrunner",
+ "repository": kwargs["repository"],
+ "script_version": "master",
+ "script_parameters": {"tasks": [script_parameters]},
+ "runtime_constraints": runtime_constraints
+ }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+
+ self.arvrunner.jobs[response["uuid"]] = self
- self.arvrunner.jobs[response["uuid"]] = self
+ logger.info("Job %s is %s", response["uuid"], response["state"])
- logger.info("Job %s is %s", response["uuid"], response["state"])
+ if response["state"] in ("Complete", "Failed", "Cancelled"):
+ self.done(response)
+ except Exception as e:
+ logger.error("Got error %s" % str(e))
+ self.output_callback({}, "permanentFail")
- if response["state"] in ("Complete", "Failed", "Cancelled"):
- self.done(response)
def done(self, record):
try:
try:
outputs = {}
- outputs = self.collect_outputs(record["output"])
+ outputs = self.collect_outputs("keep:" + record["output"])
except Exception as e:
- logger.warn(str(e))
+ logger.exception("Got exception while collecting job outputs:")
processStatus = "permanentFail"
self.output_callback(outputs, processStatus)
finally:
del self.arvrunner.jobs[record["uuid"]]
+
class ArvPathMapper(cwltool.pathmapper.PathMapper):
def __init__(self, arvrunner, referenced_files, basedir, **kwargs):
- self._pathmap = {}
+ self._pathmap = arvrunner.get_uploaded()
uploadfiles = []
- pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+/.+')
+ pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+')
for src in referenced_files:
if isinstance(src, basestring) and pdh_path.match(src):
- self._pathmap[src] = (src, "/keep/%s" % src)
- else:
- ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+ self._pathmap[src] = (src, "$(task.keep)/%s" % src[5:])
+ if src not in self._pathmap:
+ ab = cwltool.pathmapper.abspath(src, basedir)
st = arvados.commands.run.statfile("", ab)
if kwargs.get("conformance_test"):
self._pathmap[src] = (src, ab)
raise cwltool.workflow.WorkflowException("Input file path '%s' is invalid" % st)
if uploadfiles:
- arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], arvrunner.api, dry_run=kwargs.get("dry_run"), num_retries=3)
+ arvados.commands.run.uploadfiles([u[2] for u in uploadfiles],
+ arvrunner.api,
+ dry_run=kwargs.get("dry_run"),
+ num_retries=3,
+ fnPattern="$(task.keep)/%s/%s")
for src, ab, st in uploadfiles:
+ arvrunner.add_uploaded(src, (ab, st.fn))
self._pathmap[src] = (ab, st.fn)
class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
def __init__(self, arvrunner, toolpath_object, **kwargs):
- super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+ super(ArvadosCommandTool, self).__init__(toolpath_object, outdir="$(task.outdir)", tmpdir="$(task.tmpdir)", **kwargs)
self.arvrunner = arvrunner
def makeJobRunner(self):
self.lock = threading.Lock()
self.cond = threading.Condition(self.lock)
self.final_output = None
+ self.uploaded = {}
def arvMakeTool(self, toolpath_object, **kwargs):
if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
finally:
self.cond.release()
+ def get_uploaded(self):
+ return self.uploaded.copy()
+
+ def add_uploaded(self, src, pair):
+ self.uploaded[src] = pair
+
def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
kwargs["fs_access"] = self.fs_access
kwargs["enable_reuse"] = args.enable_reuse
+ kwargs["repository"] = args.repository
if kwargs.get("conformance_test"):
return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
def main(args, stdout, stderr, api_client=None):
runner = ArvCwlRunner(api_client=arvados.api('v1'))
- args.append("--leave-outputs")
+ args.insert(0, "--leave-outputs")
parser = cwltool.main.arg_parser()
exgroup = parser.add_mutually_exclusive_group()
exgroup.add_argument("--enable-reuse", action="store_true",
default=False, dest="enable_reuse",
help="")
+ parser.add_argument('--repository', type=str, default="peter/crunchrunner", help="Repository containing the 'crunchrunner' program.")
+
return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
--- /dev/null
+#!/usr/bin/env python
+
+import sys
+
+from arvados_cwl import main
+
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
license='Apache 2.0',
packages=find_packages(),
scripts=[
- 'bin/cwl-runner'
+ 'bin/cwl-runner',
+ 'bin/arvados-cwl-runner'
],
install_requires=[
- 'cwltool==1.0.20150722144138',
- 'arvados-python-client'
+ 'cwltool>=1.0.20151026181844',
+ 'arvados-python-client>=0.1.20151023214338'
],
zip_safe=True,
cmdclass={'egg_info': tagger},
"os"
"regexp"
"strings"
+ "time"
)
type StringMatcher func(string) bool
var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
var ErrInvalidArgument = errors.New("Invalid argument")
+// A common failure mode is to reuse a keepalive connection that has been
+// terminated (in a way that we can't detect) for being idle too long.
+// POST and DELETE are not safe to retry automatically, so we minimize
+// such failures by always using a new or recently active socket.
+var MaxIdleConnectionDuration = 30 * time.Second
+
// Indicates an error that was returned by the API server.
type APIServerError struct {
// Address of server returning error, of the form "host:port".
// Discovery document
DiscoveryDoc Dict
+
+ lastClosedIdlesAt time.Time
}
// Create a new ArvadosClient, initialized with standard Arvados environment
return ac, MissingArvadosApiToken
}
+ ac.lastClosedIdlesAt = time.Now()
+
return ac, err
}
req.Header.Add("X-External-Client", "1")
}
+ // POST and DELETE are not safe to retry automatically, so we minimize
+ // such failures by always using a new or recently active socket
+ if method == "POST" || method == "DELETE" {
+ if time.Since(c.lastClosedIdlesAt) > MaxIdleConnectionDuration {
+ c.lastClosedIdlesAt = time.Now()
+ c.Client.Transport.(*http.Transport).CloseIdleConnections()
+ }
+ }
+
// Make the request
var resp *http.Response
if resp, err = c.Client.Do(req); err != nil {
"net/http"
"os"
"testing"
+ "time"
)
// Gocheck boilerplate
func (s *ServerRequiredSuite) TestCreatePipelineTemplate(c *C) {
arv, err := MakeArvadosClient()
- getback := make(Dict)
- err = arv.Create("pipeline_templates",
- Dict{"pipeline_template": Dict{
- "name": "tmp",
- "components": Dict{
- "c1": map[string]string{"script": "script1"},
- "c2": map[string]string{"script": "script2"}}}},
- &getback)
- c.Assert(err, Equals, nil)
- c.Assert(getback["name"], Equals, "tmp")
- c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
-
- uuid := getback["uuid"].(string)
-
- getback = make(Dict)
- err = arv.Get("pipeline_templates", uuid, nil, &getback)
- c.Assert(err, Equals, nil)
- c.Assert(getback["name"], Equals, "tmp")
- c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
-
- getback = make(Dict)
- err = arv.Update("pipeline_templates", uuid,
- Dict{
- "pipeline_template": Dict{"name": "tmp2"}},
- &getback)
- c.Assert(err, Equals, nil)
- c.Assert(getback["name"], Equals, "tmp2")
-
- c.Assert(getback["uuid"].(string), Equals, uuid)
- getback = make(Dict)
- err = arv.Delete("pipeline_templates", uuid, nil, &getback)
- c.Assert(err, Equals, nil)
- c.Assert(getback["name"], Equals, "tmp2")
+ for _, idleConnections := range []bool{
+ false,
+ true,
+ } {
+ if idleConnections {
+ arv.lastClosedIdlesAt = time.Now().Add(-time.Minute)
+ } else {
+ arv.lastClosedIdlesAt = time.Now()
+ }
+
+ getback := make(Dict)
+ err = arv.Create("pipeline_templates",
+ Dict{"pipeline_template": Dict{
+ "name": "tmp",
+ "components": Dict{
+ "c1": map[string]string{"script": "script1"},
+ "c2": map[string]string{"script": "script2"}}}},
+ &getback)
+ c.Assert(err, Equals, nil)
+ c.Assert(getback["name"], Equals, "tmp")
+ c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
+
+ uuid := getback["uuid"].(string)
+
+ getback = make(Dict)
+ err = arv.Get("pipeline_templates", uuid, nil, &getback)
+ c.Assert(err, Equals, nil)
+ c.Assert(getback["name"], Equals, "tmp")
+ c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
+
+ getback = make(Dict)
+ err = arv.Update("pipeline_templates", uuid,
+ Dict{
+ "pipeline_template": Dict{"name": "tmp2"}},
+ &getback)
+ c.Assert(err, Equals, nil)
+ c.Assert(getback["name"], Equals, "tmp2")
+
+ c.Assert(getback["uuid"].(string), Equals, uuid)
+ getback = make(Dict)
+ err = arv.Delete("pipeline_templates", uuid, nil, &getback)
+ c.Assert(err, Equals, nil)
+ c.Assert(getback["name"], Equals, "tmp2")
+ }
}
func (s *ServerRequiredSuite) TestErrorResponse(c *C) {
--- /dev/null
+package arvadostest
+
+// IDs of API server's test fixtures
+const (
+ SpectatorToken = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+ ActiveToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+ AdminToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
+ AnonymousToken = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+ DataManagerToken = "320mkve8qkswstz7ff61glpk3mhgghmg67wmic7elw4z41pke1"
+ FooCollection = "zzzzz-4zz18-fy296fx3hot09f7"
+ NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+ HelloWorldCollection = "zzzzz-4zz18-4en62shvi99lxd4"
+ FooBarDirCollection = "zzzzz-4zz18-foonbarfilesdir"
+ FooPdh = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+ HelloWorldPdh = "55713e6a34081eb03609e7ad5fcad129+62"
+)
+
+// A valid manifest designed to test various edge cases and parsing
+// requirements
+const PathologicalManifest = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K@xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero@0 0:1:f 1:0:zero@1 1:4:ooba 4:0:zero@4 5:1:r 5:4:rbaz 9:0:zero@9\n" +
+ "./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+ "./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+ `./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+ `./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+ "./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+ ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
package auth
import (
+ "encoding/base64"
"net/http"
"net/url"
"strings"
return c
}
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
// LoadTokensFromHttpRequest loads all tokens it can find in the
// headers and query string of an http query.
func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
a.Tokens = append(a.Tokens, val...)
}
+ a.loadTokenFromCookie(r)
+
// TODO: Load token from Rails session cookie (if Rails site
// secret is known)
}
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+ cookie, err := r.Cookie("arvados_api_token")
+ if err != nil || len(cookie.Value) == 0 {
+ return
+ }
+ token, err := DecodeTokenCookie(cookie.Value)
+ if err != nil {
+ return
+ }
+ a.Tokens = append(a.Tokens, string(token))
+}
+
// TODO: LoadTokensFromHttpRequestBody(). We can't assume in
// LoadTokensFromHttpRequest() that [or how] we should read and parse
// the request body. This has to be requested explicitly by the
--- /dev/null
+package keepclient
+
+import (
+ "errors"
+ "io"
+ "os"
+
+ "git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+// ReadCloserWithLen extends io.ReadCloser with a Len() method that
+// returns the total number of bytes available to read.
+type ReadCloserWithLen interface {
+ io.ReadCloser
+ Len() uint64
+}
+
+const (
+ // After reading a data block from Keep, cfReader slices it up
+ // and sends the slices to a buffered channel to be consumed
+ // by the caller via Read().
+ //
+ // dataSliceSize is the maximum size of the slices, and
+ // therefore the maximum number of bytes that will be returned
+ // by a single call to Read().
+ dataSliceSize = 1 << 20
+)
+
+// ErrNoManifest indicates the given collection has no manifest
+// information (e.g., manifest_text was excluded by a "select"
+// parameter when retrieving the collection record).
+var ErrNoManifest = errors.New("Collection has no manifest")
+
+// CollectionFileReader returns a ReadCloserWithLen that reads file
+// content from a collection. The filename must be given relative to
+// the root of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (ReadCloserWithLen, error) {
+ mText, ok := collection["manifest_text"].(string)
+ if !ok {
+ return nil, ErrNoManifest
+ }
+ m := manifest.Manifest{Text: mText}
+ rdrChan := make(chan *cfReader)
+ go kc.queueSegmentsToGet(m, filename, rdrChan)
+ r, ok := <-rdrChan
+ if !ok {
+ return nil, os.ErrNotExist
+ }
+ return r, nil
+}
+
+// Send segments for the specified file to r.toGet. Send a *cfReader
+// to rdrChan if the specified file is found (even if it's empty).
+// Then, close rdrChan.
+func (kc *KeepClient) queueSegmentsToGet(m manifest.Manifest, filename string, rdrChan chan *cfReader) {
+ defer close(rdrChan)
+
+ // q is a queue of FileSegments that we have received but
+ // haven't yet been able to send to toGet.
+ var q []*manifest.FileSegment
+ var r *cfReader
+ for seg := range m.FileSegmentIterByName(filename) {
+ if r == nil {
+ // We've just discovered that the requested
+ // filename does appear in the manifest, so we
+ // can return a real reader (not nil) from
+ // CollectionFileReader().
+ r = newCFReader(kc)
+ rdrChan <- r
+ }
+ q = append(q, seg)
+ r.totalSize += uint64(seg.Len)
+ // Send toGet as many segments as we can until it
+ // blocks.
+ Q:
+ for len(q) > 0 {
+ select {
+ case r.toGet <- q[0]:
+ q = q[1:]
+ default:
+ break Q
+ }
+ }
+ }
+ if r == nil {
+ // File not found.
+ return
+ }
+ close(r.countDone)
+ for _, seg := range q {
+ r.toGet <- seg
+ }
+ close(r.toGet)
+}
+
+type cfReader struct {
+ keepClient *KeepClient
+
+ // doGet() reads FileSegments from toGet, gets the data from
+ // Keep, and sends byte slices to toRead to be consumed by
+ // Read().
+ toGet chan *manifest.FileSegment
+
+ // toRead is a buffered channel, sized to fit one full Keep
+ // block. This lets us verify checksums without having a
+ // store-and-forward delay between blocks: by the time the
+ // caller starts receiving data from block N, cfReader is
+ // starting to fetch block N+1. A larger buffer would be
+ // useful for a caller whose read speed varies a lot.
+ toRead chan []byte
+
+ // bytes ready to send next time someone calls Read()
+ buf []byte
+
+ // Total size of the file being read. Not safe to read this
+ // until countDone is closed.
+ totalSize uint64
+ countDone chan struct{}
+
+ // First error encountered.
+ err error
+
+ // errNotNil is closed IFF err contains a non-nil error.
+ // Receiving from it will block until an error occurs.
+ errNotNil chan struct{}
+
+ // rdrClosed is closed IFF the reader's Close() method has
+ // been called. Any goroutines associated with the reader will
+ // stop and free up resources when they notice this channel is
+ // closed.
+ rdrClosed chan struct{}
+}
+
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+ if r.Error() != nil {
+ // Short circuit: the caller might as well find out
+ // now that we hit an error, even if there's buffered
+ // data we could return.
+ return 0, r.Error()
+ }
+ for len(r.buf) == 0 {
+ // Private buffer was emptied out by the last Read()
+ // (or this is the first Read() and r.buf is nil).
+ // Read from r.toRead until we get a non-empty slice
+ // or hit an error.
+ var ok bool
+ r.buf, ok = <-r.toRead
+ if r.Error() != nil {
+ // Error encountered while waiting for bytes
+ return 0, r.Error()
+ } else if !ok {
+ // No more bytes to read, no error encountered
+ return 0, io.EOF
+ }
+ }
+ // Copy as much as possible from our private buffer to the
+ // caller's buffer
+ n := len(r.buf)
+ if len(r.buf) > len(outbuf) {
+ n = len(outbuf)
+ }
+ copy(outbuf[:n], r.buf[:n])
+
+ // Next call to Read() will continue where we left off
+ r.buf = r.buf[n:]
+
+ return n, nil
+}
+
+// Close releases resources. It returns a non-nil error if an error
+// was encountered by the reader.
+func (r *cfReader) Close() error {
+ close(r.rdrClosed)
+ return r.Error()
+}
+
+// Error returns an error if one has been encountered, otherwise
+// nil. It is safe to call from any goroutine.
+func (r *cfReader) Error() error {
+ select {
+ case <-r.errNotNil:
+ return r.err
+ default:
+ return nil
+ }
+}
+
+// Len returns the total number of bytes in the file being read. If
+// necessary, it waits for manifest parsing to finish.
+func (r *cfReader) Len() uint64 {
+ // Wait for all segments to be counted
+ <-r.countDone
+ return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+ defer close(r.toRead)
+GET:
+ for fs := range r.toGet {
+ rdr, _, _, err := r.keepClient.Get(fs.Locator)
+ if err != nil {
+ r.err = err
+ close(r.errNotNil)
+ return
+ }
+ var buf = make([]byte, fs.Offset+fs.Len)
+ _, err = io.ReadFull(rdr, buf)
+ if err != nil {
+ r.err = err
+ close(r.errNotNil)
+ return
+ }
+ for bOff, bLen := fs.Offset, dataSliceSize; bOff < fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+ if bOff+bLen > fs.Offset+fs.Len {
+ bLen = fs.Offset + fs.Len - bOff
+ }
+ select {
+ case r.toRead <- buf[bOff : bOff+bLen]:
+ case <-r.rdrClosed:
+ // Reader is closed: no point sending
+ // anything more to toRead.
+ break GET
+ }
+ }
+ // It is possible that r.rdrClosed is closed but we
+ // never noticed because r.toRead was also ready in
+ // every select{} above. Here we check before wasting
+ // a keepclient.Get() call.
+ select {
+ case <-r.rdrClosed:
+ break GET
+ default:
+ }
+ }
+ // In case we exited the above loop early: before returning,
+ // drain the toGet channel so its sender doesn't sit around
+ // blocking forever.
+ for _ = range r.toGet {
+ }
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+ r = new(cfReader)
+ r.keepClient = kc
+ r.rdrClosed = make(chan struct{})
+ r.errNotNil = make(chan struct{})
+ r.toGet = make(chan *manifest.FileSegment, 2)
+ r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
+ r.countDone = make(chan struct{})
+ go r.doGet()
+ return
+}
--- /dev/null
+package keepclient
+
+import (
+ "crypto/md5"
+ "crypto/rand"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "os"
+ "strconv"
+ "strings"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CollectionReaderUnit{})
+
+type CollectionReaderUnit struct {
+ arv arvadosclient.ArvadosClient
+ kc *KeepClient
+ handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+ var err error
+ s.arv, err = arvadosclient.MakeArvadosClient()
+ c.Assert(err, check.IsNil)
+ s.arv.ApiToken = arvadostest.ActiveToken
+
+ s.kc, err = MakeKeepClient(&s.arv)
+ c.Assert(err, check.IsNil)
+
+ s.handler = SuccessHandler{
+ disk: make(map[string][]byte),
+ lock: make(chan struct{}, 1),
+ ops: new(int),
+ }
+ localRoots := make(map[string]string)
+ for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+ localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+ }
+ s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
+type SuccessHandler struct {
+ disk map[string][]byte
+ lock chan struct{} // channel with buffer==1: full when an operation is in progress.
+ ops *int // number of operations completed
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+ switch req.Method {
+ case "PUT":
+ buf, err := ioutil.ReadAll(req.Body)
+ if err != nil {
+ resp.WriteHeader(500)
+ return
+ }
+ pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+ h.lock <- struct{}{}
+ h.disk[pdh] = buf
+ if h.ops != nil {
+ (*h.ops)++
+ }
+ <-h.lock
+ resp.Write([]byte(pdh))
+ case "GET":
+ pdh := req.URL.Path[1:]
+ h.lock <- struct{}{}
+ buf, ok := h.disk[pdh]
+ if h.ops != nil {
+ (*h.ops)++
+ }
+ <-h.lock
+ if !ok {
+ resp.WriteHeader(http.StatusNotFound)
+ } else {
+ resp.Write(buf)
+ }
+ default:
+ resp.WriteHeader(http.StatusMethodNotAllowed)
+ }
+}
+
+type rdrTest struct {
+ mt string // manifest text
+ f string // filename
+ want interface{} // error or string to expect
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+ s.kc.PutB([]byte("foo"))
+ s.kc.PutB([]byte("bar"))
+ s.kc.PutB([]byte("Hello world\n"))
+ s.kc.PutB([]byte(""))
+
+ mt := arvadostest.PathologicalManifest
+
+ for _, testCase := range []rdrTest{
+ {mt: mt, f: "zzzz", want: os.ErrNotExist},
+ {mt: mt, f: "frob", want: os.ErrNotExist},
+ {mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+ {mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+ {mt: mt, f: "/f", want: os.ErrNotExist},
+ {mt: mt, f: "./f", want: os.ErrNotExist},
+ {mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+ {mt: mt, f: "foo/zero", want: ""},
+ {mt: mt, f: "zero@0", want: ""},
+ {mt: mt, f: "zero@1", want: ""},
+ {mt: mt, f: "zero@4", want: ""},
+ {mt: mt, f: "zero@9", want: ""},
+ {mt: mt, f: "f", want: "f"},
+ {mt: mt, f: "ooba", want: "ooba"},
+ {mt: mt, f: "overlapReverse/o", want: "o"},
+ {mt: mt, f: "overlapReverse/oo", want: "oo"},
+ {mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+ {mt: mt, f: "foo bar/baz", want: "foo"},
+ {mt: mt, f: "segmented/frob", want: "frob"},
+ {mt: mt, f: "segmented/oof", want: "oof"},
+ } {
+ rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+ switch want := testCase.want.(type) {
+ case error:
+ c.Check(rdr, check.IsNil)
+ c.Check(err, check.Equals, want)
+ case string:
+ buf := make([]byte, len(want))
+ n, err := io.ReadFull(rdr, buf)
+ c.Check(err, check.IsNil)
+ for i := 0; i < 4; i++ {
+ c.Check(string(buf), check.Equals, want)
+ n, err = rdr.Read(buf)
+ c.Check(n, check.Equals, 0)
+ c.Check(err, check.Equals, io.EOF)
+ }
+ c.Check(rdr.Close(), check.Equals, nil)
+ }
+ }
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+ h := md5.New()
+ buf := make([]byte, 4096)
+ locs := make([]string, len(buf))
+ filesize := 0
+ for i := 0; i < len(locs); i++ {
+ _, err := io.ReadFull(rand.Reader, buf[:i])
+ c.Assert(err, check.IsNil)
+ h.Write(buf[:i])
+ locs[i], _, err = s.kc.PutB(buf[:i])
+ c.Assert(err, check.IsNil)
+ filesize += i
+ }
+ manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+ dataMD5 := h.Sum(nil)
+
+ checkMD5 := md5.New()
+ rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+ c.Check(err, check.IsNil)
+ _, err = io.Copy(checkMD5, rdr)
+ c.Check(err, check.IsNil)
+ _, err = rdr.Read(make([]byte, 1))
+ c.Check(err, check.Equals, io.EOF)
+ c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+ s.kc.PutB([]byte("foo"))
+
+ mt := ". "
+ for i := 0; i < 1000; i++ {
+ mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+ }
+ mt += "0:3000:foo1000.txt\n"
+
+ // Grab the stub server's lock, ensuring our cfReader doesn't
+ // get anything back from its first call to kc.Get() before we
+ // have a chance to call Close().
+ s.handler.lock <- struct{}{}
+ opsBeforeRead := *s.handler.ops
+
+ rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+ c.Assert(err, check.IsNil)
+
+ firstReadDone := make(chan struct{})
+ go func() {
+ rdr.Read(make([]byte, 6))
+ firstReadDone <- struct{}{}
+ }()
+ err = rdr.Close()
+ c.Assert(err, check.IsNil)
+ c.Assert(rdr.(*cfReader).Error(), check.IsNil)
+
+ // Release the stub server's lock. The first GET operation will proceed.
+ <-s.handler.lock
+
+ // Make sure our first read operation consumes the data
+ // received from the first GET.
+ <-firstReadDone
+
+ // doGet() should close toRead before sending any more bufs to it.
+ if what, ok := <-rdr.(*cfReader).toRead; ok {
+ c.Errorf("Got %q, expected toRead to be closed", what)
+ }
+
+ // Stub should have handled exactly one GET request.
+ c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+ manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+ buf := make([]byte, 1)
+ rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+ c.Check(err, check.IsNil)
+ for i := 0; i < 2; i++ {
+ _, err = io.ReadFull(rdr, buf)
+ c.Check(err, check.NotNil)
+ c.Check(err, check.Not(check.Equals), io.EOF)
+ }
+}
retryList = append(retryList, host)
} else if resp.StatusCode != http.StatusOK {
var respbody []byte
- respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+ respbody, _ = ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
resp.Body.Close()
errs = append(errs, fmt.Sprintf("%s: HTTP %d %q",
url, resp.StatusCode, bytes.TrimSpace(respbody)))
make(chan string)}
UploadToStubHelper(c, st,
- func(kc *KeepClient, url string, reader io.ReadCloser,
- writer io.WriteCloser, upload_status chan uploadStatus) {
+ func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
- go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
+ go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), 0)
writer.Write([]byte("foo"))
writer.Close()
br1 := tr.MakeStreamReader()
- go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, "TestUploadToStubKeepServerBufferReader")
+ go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, 0)
writer.Write([]byte("foo"))
writer.Close()
func(kc *KeepClient, url string, reader io.ReadCloser,
writer io.WriteCloser, upload_status chan uploadStatus) {
- go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
+ go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, 0)
writer.Write([]byte("foo"))
writer.Close()
"io"
"io/ioutil"
"log"
+ "math/rand"
"net"
"net/http"
"strings"
"time"
)
+// Function used to emit debug messages. The easiest way to enable
+// keepclient debug messages in your application is to assign
+// log.Printf to DebugPrintf.
+var DebugPrintf = func(string, ...interface{}) {}
+
type keepService struct {
Uuid string `json:"uuid"`
Hostname string `json:"service_host"`
response string
}
-func (this KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
- upload_status chan<- uploadStatus, expectedLength int64, requestId string) {
+func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
+ upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
var req *http.Request
var err error
var url = fmt.Sprintf("%s/%s", host, hash)
if req, err = http.NewRequest("PUT", url, nil); err != nil {
- log.Printf("[%v] Error creating request PUT %v error: %v", requestId, url, err.Error())
+ log.Printf("[%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
upload_status <- uploadStatus{err, url, 0, 0, ""}
body.Close()
return
var resp *http.Response
if resp, err = this.Client.Do(req); err != nil {
- log.Printf("[%v] Upload failed %v error: %v", requestId, url, err.Error())
+ log.Printf("[%08x] Upload failed %v error: %v", requestID, url, err.Error())
upload_status <- uploadStatus{err, url, 0, 0, ""}
return
}
defer resp.Body.Close()
defer io.Copy(ioutil.Discard, resp.Body)
- respbody, err2 := ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
+ respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
response := strings.TrimSpace(string(respbody))
if err2 != nil && err2 != io.EOF {
- log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
+ log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
} else if resp.StatusCode == http.StatusOK {
- log.Printf("[%v] Upload %v success", requestId, url)
+ log.Printf("[%08x] Upload %v success", requestID, url)
upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
} else {
- log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
+ log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
}
}
-func (this KeepClient) putReplicas(
+func (this *KeepClient) putReplicas(
hash string,
tr *streamer.AsyncStream,
expectedLength int64) (locator string, replicas int, err error) {
- // Take the hash of locator and timestamp in order to identify this
- // specific transaction in log statements.
- requestId := fmt.Sprintf("%x", md5.Sum([]byte(hash+time.Now().String())))[0:8]
+ // Generate an arbitrary ID to identify this specific
+ // transaction in debug logs.
+ requestID := rand.Int31()
// Calculate the ordering for uploading to servers
sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
for active*replicasPerThread < remaining_replicas {
// Start some upload requests
if next_server < len(sv) {
- log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
- go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestId)
+ log.Printf("[%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
+ go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
next_server += 1
active += 1
} else {
}
}
}
- log.Printf("[%v] Replicas remaining to write: %v active uploads: %v",
- requestId, remaining_replicas, active)
+ log.Printf("[%08x] Replicas remaining to write: %v active uploads: %v",
+ requestID, remaining_replicas, active)
// Now wait for something to happen.
if active > 0 {
package manifest
import (
+ "errors"
+ "fmt"
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
"log"
+ "regexp"
+ "strconv"
"strings"
)
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+ "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
+
type Manifest struct {
Text string
}
+type BlockLocator struct {
+ Digest blockdigest.BlockDigest
+ Size int
+ Hints []string
+}
+
+type DataSegment struct {
+ BlockLocator
+ Locator string
+ StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+ Locator string
+ // Offset (within this block) of this data segment
+ Offset int
+ Len int
+}
+
// Represents a single line from a manifest.
type ManifestStream struct {
StreamName string
Blocks []string
- Files []string
+ FileTokens []string
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+ if seq == `\\` {
+ return `\`
+ }
+ i, err := strconv.ParseUint(seq[1:], 8, 8)
+ if err != nil {
+ // Invalid escape sequence: can't unescape.
+ return seq
+ }
+ return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+ return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+ if !LocatorPattern.MatchString(s) {
+ err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+ "\"%s\".",
+ s,
+ LocatorPattern.String())
+ } else {
+ tokens := strings.Split(s, "+")
+ var blockSize int64
+ var blockDigest blockdigest.BlockDigest
+ // We expect both of the following to succeed since LocatorPattern
+ // restricts the strings appropriately.
+ blockDigest, err = blockdigest.FromString(tokens[0])
+ if err != nil {
+ return
+ }
+ blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+ if err != nil {
+ return
+ }
+ b.Digest = blockDigest
+ b.Size = int(blockSize)
+ b.Hints = tokens[2:]
+ }
+ return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+ parts := strings.SplitN(tok, ":", 3)
+ if len(parts) != 3 {
+ err = ErrInvalidToken
+ return
+ }
+ segPos, err = strconv.ParseUint(parts[0], 10, 64)
+ if err != nil {
+ return
+ }
+ segLen, err = strconv.ParseUint(parts[1], 10, 64)
+ if err != nil {
+ return
+ }
+ name = UnescapeName(parts[2])
+ return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+ ch := make(chan *FileSegment)
+ go func() {
+ s.sendFileSegmentIterByName(filepath, ch)
+ close(ch)
+ }()
+ return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+ blockLens := make([]int, 0, len(s.Blocks))
+ // This is what streamName+"/"+fileName will look like:
+ target := "./" + filepath
+ for _, fTok := range s.FileTokens {
+ wantPos, wantLen, name, err := parseFileToken(fTok)
+ if err != nil {
+ // Skip (!) invalid file tokens.
+ continue
+ }
+ if s.StreamName+"/"+name != target {
+ continue
+ }
+ if wantLen == 0 {
+ ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+ continue
+ }
+ // Linear search for blocks containing data for this
+ // file
+ var blockPos uint64 = 0 // position of block in stream
+ for i, loc := range s.Blocks {
+ if blockPos >= wantPos+wantLen {
+ break
+ }
+ if len(blockLens) <= i {
+ blockLens = blockLens[:i+1]
+ b, err := ParseBlockLocator(loc)
+ if err != nil {
+ // Unparseable locator -> unusable
+ // stream.
+ ch <- nil
+ return
+ }
+ blockLens[i] = b.Size
+ }
+ blockLen := uint64(blockLens[i])
+ if blockPos+blockLen <= wantPos {
+ blockPos += blockLen
+ continue
+ }
+ fseg := FileSegment{
+ Locator: loc,
+ Offset: 0,
+ Len: blockLens[i],
+ }
+ if blockPos < wantPos {
+ fseg.Offset = int(wantPos - blockPos)
+ fseg.Len -= fseg.Offset
+ }
+ if blockPos+blockLen > wantPos+wantLen {
+ fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+ }
+ ch <- &fseg
+ blockPos += blockLen
+ }
+ }
}
func parseManifestStream(s string) (m ManifestStream) {
tokens := strings.Split(s, " ")
- m.StreamName = tokens[0]
+ m.StreamName = UnescapeName(tokens[0])
tokens = tokens[1:]
var i int
for i = range tokens {
}
}
m.Blocks = tokens[:i]
- m.Files = tokens[i:]
+ m.FileTokens = tokens[i:]
return
}
return ch
}
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+ ch := make(chan *FileSegment)
+ go func() {
+ for stream := range m.StreamIter() {
+ if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+ continue
+ }
+ stream.sendFileSegmentIterByName(filepath, ch)
+ }
+ close(ch)
+ }()
+ return ch
+}
+
// Blocks may appear mulitple times within the same manifest if they
// are used by multiple files. In that case this Iterator will output
// the same block multiple times.
package manifest
import (
- "git.curoverse.com/arvados.git/sdk/go/blockdigest"
"io/ioutil"
+ "reflect"
"runtime"
"testing"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ "git.curoverse.com/arvados.git/sdk/go/blockdigest"
)
func getStackTrace() string {
func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
expectEqual(t, actual.StreamName, expected.StreamName)
expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
- expectStringSlicesEqual(t, actual.Files, expected.Files)
+ expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
}
func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
func TestParseManifestStreamSimple(t *testing.T) {
m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
expectManifestStream(t, m, ManifestStream{StreamName: ".",
- Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
- Files: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+ Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+ FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+ b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+ if err != nil {
+ t.Fatalf("Unexpected error parsing block locator: %v", err)
+ }
+ expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
+ Size: 2310,
+ Hints: []string{"K@qr1hi",
+ "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
}
func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
expectManifestStream(t,
firstStream,
ManifestStream{StreamName: ".",
- Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
- Files: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+ Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
+ FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
received, ok := <-streamIter
if ok {
Size: 31367794,
Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db@5441920c"}})
}
+
+func TestUnescape(t *testing.T) {
+ for _, testCase := range [][]string{
+ {`\040`, ` `},
+ {`\009`, `\009`},
+ {`\\\040\\`, `\ \`},
+ {`\\040\`, `\040\`},
+ } {
+ in := testCase[0]
+ expect := testCase[1]
+ got := UnescapeName(in)
+ if expect != got {
+ t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+ }
+ }
+}
+
+type fsegtest struct {
+ mt string // manifest text
+ f string // filename
+ want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+ mt := arvadostest.PathologicalManifest
+ for _, testCase := range []fsegtest{
+ {mt: mt, f: "zzzz", want: nil},
+ // This case is too sensitive: it would be acceptable
+ // (even preferable) to return only one empty segment.
+ {mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+ {mt: mt, f: "zero@0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+ {mt: mt, f: "zero@1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+ {mt: mt, f: "zero@4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+ {mt: mt, f: "zero@9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+ {mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+ {mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+ {mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+ {mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+ {mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+ {mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+ // This case is too sensitive: it would be better to
+ // omit the empty segment.
+ {mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+ {mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+ } {
+ m := Manifest{Text: testCase.mt}
+ var got []FileSegment
+ for fs := range m.FileSegmentIterByName(testCase.f) {
+ got = append(got, *fs)
+ }
+ if !reflect.DeepEqual(got, testCase.want) {
+ t.Errorf("For %#v:\n got %#v\n want %#v", testCase.f, got, testCase.want)
+ }
+ }
+}
return prefix+fn
-def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
+def uploadfiles(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)"):
# Find the smallest path prefix that includes all the files that need to be uploaded.
# This starts at the root and iteratively removes common parent directory prefixes
# until all file pathes no longer have a common parent.
logger.info("Uploaded to %s", item["uuid"])
for c in files:
- c.fn = "$(file %s/%s)" % (pdh, c.fn)
+ c.fn = fnPattern % (pdh, c.fn)
os.chdir(orgdir)
keep_args['-enforce-permissions'] = 'true'
with open(os.path.join(TEST_TMPDIR, "keep.data-manager-token-file"), "w") as f:
keep_args['-data-manager-token-file'] = f.name
- f.write(os.environ['ARVADOS_API_TOKEN'])
+ f.write(auth_token('data_manager'))
keep_args['-never-delete'] = 'false'
api = arvados.api(
--- /dev/null
+# Install the supplied string (or a randomly generated token, if none
+# is given) as an API token that authenticates to the system user account.
+
+module CreateSuperUserToken
+ require File.dirname(__FILE__) + '/../config/boot'
+ require File.dirname(__FILE__) + '/../config/environment'
+
+ include ApplicationHelper
+
+ def create_superuser_token supplied_token=nil
+ act_as_system_user do
+ # If token is supplied, verify that it indeed is a superuser token
+ if supplied_token
+ api_client_auth = ApiClientAuthorization.
+ where(api_token: supplied_token).
+ first
+ if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
+ raise "Token already exists but is not a superuser token."
+ end
+ end
+
+ # need to create a token
+ if !api_client_auth
+ # Get (or create) trusted api client
+ apiClient = ApiClient.find_or_create_by_url_prefix_and_is_trusted("ssh://root@localhost/", true)
+
+ # Check if there is an unexpired superuser token corresponding to this api client
+ api_client_auth = ApiClientAuthorization.where(
+ 'user_id = (?) AND
+ api_client_id = (?) AND
+ (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)',
+ system_user.id, apiClient.id).first
+
+ # none exist; create one with the supplied token
+ if !api_client_auth
+ api_client_auth = ApiClientAuthorization.
+ new(user: system_user,
+ api_client_id: apiClient.id,
+ created_by_ip_address: '::1',
+ api_token: supplied_token)
+ api_client_auth.save!
+ end
+ end
+
+ api_client_auth.api_token
+ end
+ end
+end
#
# Print the token on stdout.
-supplied_token = ARGV[0]
-
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-
-include ApplicationHelper
-act_as_system_user
+require './lib/create_superuser_token'
+include CreateSuperUserToken
-if supplied_token
- api_client_auth = ApiClientAuthorization.
- where(api_token: supplied_token).
- first
- if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
- raise ActiveRecord::RecordNotUnique("Token already exists but is not a superuser token.")
- end
-end
-
-if !api_client_auth
- api_client_auth = ApiClientAuthorization.
- new(user: system_user,
- api_client_id: 0,
- created_by_ip_address: '::1',
- api_token: supplied_token)
- api_client_auth.save!
-end
+supplied_token = ARGV[0]
-puts api_client_auth.api_token
+token = CreateSuperUserToken.create_superuser_token supplied_token
+puts token
api_token: 1a9ffdcga2o7cw8q12dndskomgs1ygli3ns9k2o9hgzgmktc78
expires_at: 2038-01-01 00:00:00
+data_manager:
+ api_client: untrusted
+ user: system_user
+ api_token: 320mkve8qkswstz7ff61glpk3mhgghmg67wmic7elw4z41pke1
+ expires_at: 2038-01-01 00:00:00
+ scopes:
+ - GET /arvados/v1/collections
+ - GET /arvados/v1/keep_services
+ - GET /arvados/v1/keep_services/accessible
+ - GET /arvados/v1/users/current
+ - POST /arvados/v1/logs
+
miniadmin:
api_client: untrusted
user: miniadmin
modified_by_user_uuid: zzzzz-tpzed-user1withloadab
modified_at: 2014-02-03T17:22:54Z
updated_at: 2014-02-03T17:22:54Z
- manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
+ manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64 0:32:file1_in_subdir4.txt 32:32:file2_in_subdir4.txt"
graph_test_collection1:
uuid: zzzzz-4zz18-bv31uwvy3neko22
modified_at: 2014-02-03T17:22:54Z
updated_at: 2014-02-03T17:22:54Z
name: collection_with_repeated_filenames_and_contents_in_two_dirs_1
- manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34+Af966b611a1e6a7df18e0f20ac742a255c27744b7@550a3f11 0:12:alice 12:12:alice.txt 24:10:bob.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44+A1780092551dadcb9c74190a793a779cea84d632d@550a3f11 0:27:alice.txt\n"
+ manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34 0:12:alice 12:12:alice.txt 24:10:bob.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44 0:27:alice.txt\n"
collection_with_repeated_filenames_and_contents_in_two_dirs_2:
uuid: zzzzz-4zz18-duplicatenames2
modified_at: 2014-02-03T17:22:54Z
updated_at: 2014-02-03T17:22:54Z
name: collection_with_repeated_filenames_and_contents_in_two_dirs_2
- manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34+Af966b611a1e6a7df18e0f20ac742a255c27744b7@550a3f11 0:12:alice 12:12:alice.txt 24:10:carol.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44+A1780092551dadcb9c74190a793a779cea84d632d@550a3f11 0:27:alice.txt\n"
+ manifest_text: "./dir1 92b53930db60fe94be2a73fc771ba921+34 0:12:alice 12:12:alice.txt 24:10:carol.txt\n./dir2 56ac2557b1ded11ccab7293dc47d1e88+44 0:27:alice.txt\n"
foo_and_bar_files_in_dir:
uuid: zzzzz-4zz18-foonbarfilesdir
modified_at: 2014-02-03T17:22:54Z
updated_at: 2014-02-03T17:22:54Z
name: foo_file_in_dir
- manifest_text: "./dir1 a84b928ebdbae3f658518c711beaec02+28+A0cff02249e70e8cd6e55dba49fef4afa3f5bfdfb@550acd28 0:3:bar 12:16:foo\n"
+ manifest_text: "./dir1 3858f62230ac3c915f300c664312c63f+6 3:3:bar 0:3:foo\n"
multi_level_to_combine:
uuid: zzzzz-4zz18-pyw8yp9g3ujh45f
--- /dev/null
+require 'test_helper'
+require 'create_superuser_token'
+
+class CreateSuperUserTokenTest < ActiveSupport::TestCase
+ include CreateSuperUserToken
+
+ test "create superuser token twice and expect same resutls" do
+ # Create a token with some string
+ token1 = create_superuser_token 'atesttoken'
+ assert_not_nil token1
+ assert_equal token1, 'atesttoken'
+
+ # Create token again; this time, we should get the one created earlier
+ token2 = create_superuser_token
+ assert_not_nil token2
+ assert_equal token1, token2
+ end
+
+ test "create superuser token with two different inputs and expect the first both times" do
+ # Create a token with some string
+ token1 = create_superuser_token 'atesttoken'
+ assert_not_nil token1
+ assert_equal token1, 'atesttoken'
+
+ # Create token again with some other string and expect the existing superuser token back
+ token2 = create_superuser_token 'someothertokenstring'
+ assert_not_nil token2
+ assert_equal token1, token2
+ end
+
+ test "create superuser token twice and expect same results" do
+ # Create a token with some string
+ token1 = create_superuser_token 'atesttoken'
+ assert_not_nil token1
+ assert_equal token1, 'atesttoken'
+
+ # Create token again with that same superuser token and expect it back
+ token2 = create_superuser_token 'atesttoken'
+ assert_not_nil token2
+ assert_equal token1, token2
+ end
+
+ test "create superuser token and invoke again with some other valid token" do
+ # Create a token with some string
+ token1 = create_superuser_token 'atesttoken'
+ assert_not_nil token1
+ assert_equal token1, 'atesttoken'
+
+ su_token = api_client_authorizations("system_user").api_token
+ token2 = create_superuser_token su_token
+ assert_equal token2, su_token
+ end
+
+ test "create superuser token, expire it, and create again" do
+ # Create a token with some string
+ token1 = create_superuser_token 'atesttoken'
+ assert_not_nil token1
+ assert_equal token1, 'atesttoken'
+
+ # Expire this token and call create again; expect a new token created
+ apiClientAuth = ApiClientAuthorization.where(api_token: token1).first
+ Thread.current[:user] = users(:admin)
+ apiClientAuth.update_attributes expires_at: '2000-10-10'
+
+ token2 = create_superuser_token
+ assert_not_nil token2
+ assert_not_equal token1, token2
+ end
+
+ test "invoke create superuser token with an invalid non-superuser token and expect error" do
+ active_user_token = api_client_authorizations("active").api_token
+ e = assert_raises RuntimeError do
+ create_superuser_token active_user_token
+ end
+ assert_not_nil e
+ assert_equal "Token already exists but is not a superuser token.", e.message
+ end
+end
}
if after, err := rcv.ReadBytes('\n'); err != nil || string(after) != "after\n" {
- t.Fatal("\"after\n\" not received (got \"%s\", %s)", after, err)
+ t.Fatalf("\"after\n\" not received (got \"%s\", %s)", after, err)
}
select {
"time"
)
-const (
- ActiveUserToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
- AdminToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-)
-
var arv arvadosclient.ArvadosClient
var keepClient *keepclient.KeepClient
var keepServers []string
arvadostest.StartKeep(2, false)
arv = makeArvadosClient()
+ arv.ApiToken = arvadostest.DataManagerToken
// keep client
keepClient = &keepclient.KeepClient{
return match[1] + "+" + match[2]
}
+func switchToken(t string) func() {
+ orig := arv.ApiToken
+ restore := func() {
+ arv.ApiToken = orig
+ }
+ arv.ApiToken = t
+ return restore
+}
+
func getCollection(t *testing.T, uuid string) Dict {
+ defer switchToken(arvadostest.AdminToken)()
+
getback := make(Dict)
err := arv.Get("collections", uuid, nil, &getback)
if err != nil {
}
func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
+ defer switchToken(arvadostest.AdminToken)()
+
err := arv.Update("collections", uuid, arvadosclient.Dict{
"collection": arvadosclient.Dict{
paramName: paramValue,
type Dict map[string]interface{}
func deleteCollection(t *testing.T, uuid string) {
+ defer switchToken(arvadostest.AdminToken)()
+
getback := make(Dict)
err := arv.Delete("collections", uuid, nil, &getback)
if err != nil {
path := keepServers[i] + "/index"
client := http.Client{}
req, err := http.NewRequest("GET", path, nil)
- req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+ req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
req.Header.Add("Content-Type", "application/octet-stream")
resp, err := client.Do(req)
defer resp.Body.Close()
func getStatus(t *testing.T, path string) interface{} {
client := http.Client{}
req, err := http.NewRequest("GET", path, nil)
- req.Header.Add("Authorization", "OAuth2 "+AdminToken)
+ req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
req.Header.Add("Content-Type", "application/octet-stream")
resp, err := client.Do(req)
if err != nil {
defer TearDownDataManagerTest(t)
SetupDataManagerTest(t)
- arv.ApiToken = ActiveUserToken
+ arv.ApiToken = arvadostest.ActiveToken
err := singlerun(arv)
if err == nil {
// ServerAddress struct
type ServerAddress struct {
- SSL bool `json:service_ssl_flag`
+ SSL bool `json:"service_ssl_flag"`
Host string `json:"service_host"`
Port int `json:"service_port"`
UUID string `json:"uuid"`
class DockerImageCleaner(DockerImageUseRecorder):
event_handlers = DockerImageUseRecorder.event_handlers.copy()
- def __init__(self, images, docker_client, events):
+ def __init__(self, images, docker_client, events, remove_containers_onexit=False):
super().__init__(images, docker_client, events)
self.logged_unknown = set()
+ self.remove_containers_onexit = remove_containers_onexit
def new_container(self, event, container_hash):
container_image_id = container_hash['Image']
self.images.add_image(image_hash)
return super().new_container(event, container_hash)
+ def _remove_container(self, cid):
+ try:
+ self.docker_client.remove_container(cid)
+ except docker.errors.APIError as error:
+ logger.warning("Failed to remove container %s: %s", cid, error)
+ else:
+ logger.info("Removed container %s", cid)
+
+ @event_handlers.on('die')
+ def clean_container(self, event=None):
+ if self.remove_containers_onexit:
+ self._remove_container(event['id'])
+
+ def check_stopped_containers(self, remove=False):
+ logger.info("Checking for stopped containers")
+ for c in self.docker_client.containers(filters={'status': 'exited'}):
+ logger.info("Container %s %s", c['Id'], c['Status'])
+ if c['Status'][:6] != 'Exited':
+ logger.error("Unexpected status %s for container %s",
+ c['Status'], c['Id'])
+ elif remove:
+ self._remove_container(c['Id'])
+
@event_handlers.on('destroy')
def clean_images(self, event=None):
for image_id in self.images.should_delete():
parser.add_argument(
'--quota', action='store', type=human_size, required=True,
help="space allowance for Docker images, suffixed with K/M/G/T")
+ parser.add_argument(
+ '--remove-stopped-containers', type=str, default='always',
+ choices=['never', 'onexit', 'always'],
+ help="""when to remove stopped containers (default: always, i.e., remove
+ stopped containers found at startup, and remove containers as
+ soon as they exit)""")
parser.add_argument(
'--verbose', '-v', action='count', default=0,
help="log more information")
images, docker_client, docker_client.events(since=1, until=start_time))
use_recorder.run()
cleaner = DockerImageCleaner(
- images, docker_client, docker_client.events(since=start_time))
- logger.info("Starting cleanup loop")
+ images, docker_client, docker_client.events(since=start_time),
+ remove_containers_onexit=args.remove_stopped_containers != 'never')
+ cleaner.check_stopped_containers(
+ remove=args.remove_stopped_containers == 'always')
+ logger.info("Checking image quota at startup")
cleaner.clean_images()
+ logger.info("Listening for docker events")
cleaner.run()
def main(arguments):
class DockerImageUseRecorderTestCase(unittest.TestCase):
TEST_CLASS = cleaner.DockerImageUseRecorder
+ TEST_CLASS_INIT_KWARGS = {}
def setUp(self):
self.images = mock.MagicMock(name='images')
self.docker_client = mock.MagicMock(name='docker_client')
self.events = []
self.recorder = self.TEST_CLASS(self.images, self.docker_client,
- self.encoded_events)
+ self.encoded_events, **self.TEST_CLASS_INIT_KWARGS)
@property
def encoded_events(self):
self.assertFalse(self.images.del_image.called)
+class DockerContainerCleanerTestCase(DockerImageUseRecorderTestCase):
+ TEST_CLASS = cleaner.DockerImageCleaner
+ TEST_CLASS_INIT_KWARGS = {'remove_containers_onexit': True}
+
+ @mock.patch('arvados_docker.cleaner.logger')
+ def test_failed_container_deletion_handling(self, mockLogger):
+ cid = MockDockerId()
+ self.docker_client.remove_container.side_effect = MockException(500)
+ self.events.append(MockEvent('die', docker_id=cid))
+ self.recorder.run()
+ self.docker_client.remove_container.assert_called_with(cid)
+ self.assertEqual("Failed to remove container %s: %s",
+ mockLogger.warning.call_args[0][0])
+ self.assertEqual(cid,
+ mockLogger.warning.call_args[0][1])
+
+
class HumanSizeTestCase(unittest.TestCase):
def check(self, human_str, count, exp):
self.assertEqual(count * (1024 ** exp),
self.assertLessEqual(test_start_time, event_kwargs[0]['until'])
self.assertIn('since', event_kwargs[1])
self.assertEqual(event_kwargs[0]['until'], event_kwargs[1]['since'])
+
+
+class ContainerRemovalTestCase(unittest.TestCase):
+ LIFECYCLE = ['create', 'attach', 'start', 'resize', 'die', 'destroy']
+
+ def setUp(self):
+ self.args = mock.MagicMock(name='args')
+ self.docker_client = mock.MagicMock(name='docker_client')
+ self.existingCID = MockDockerId()
+ self.docker_client.containers.return_value = [{
+ 'Id': self.existingCID,
+ 'Status': 'Exited (0) 6 weeks ago',
+ }, {
+ # If docker_client.containers() returns non-exited
+ # containers for some reason, do not remove them.
+ 'Id': MockDockerId(),
+ 'Status': 'Running',
+ }]
+ self.newCID = MockDockerId()
+ self.docker_client.events.return_value = [
+ MockEvent(e, docker_id=self.newCID).encoded()
+ for e in self.LIFECYCLE]
+
+ def test_remove_onexit(self):
+ self.args.remove_stopped_containers = 'onexit'
+ cleaner.run(self.args, self.docker_client)
+ self.docker_client.remove_container.assert_called_once_with(self.newCID)
+
+ def test_remove_always(self):
+ self.args.remove_stopped_containers = 'always'
+ cleaner.run(self.args, self.docker_client)
+ self.docker_client.remove_container.assert_any_call(self.existingCID)
+ self.docker_client.remove_container.assert_any_call(self.newCID)
+ self.assertEqual(2, self.docker_client.remove_container.call_count)
+
+ def test_remove_never(self):
+ self.args.remove_stopped_containers = 'never'
+ cleaner.run(self.args, self.docker_client)
+ self.assertEqual(0, self.docker_client.remove_container.call_count)
+
+ def test_container_exited_between_subscribe_events_and_check_existing(self):
+ self.args.remove_stopped_containers = 'always'
+ self.docker_client.events.return_value = [
+ MockEvent(e, docker_id=self.existingCID).encoded()
+ for e in ['die', 'destroy']]
+ cleaner.run(self.args, self.docker_client)
+ # Subscribed to events before getting the list of existing
+ # exited containers?
+ self.docker_client.assert_has_calls([
+ mock.call.events(since=mock.ANY),
+ mock.call.containers(filters={'status':'exited'})])
+ # Asked to delete the container twice?
+ self.docker_client.remove_container.assert_has_calls([mock.call(self.existingCID)] * 2)
+ self.assertEqual(2, self.docker_client.remove_container.call_count)
--- /dev/null
+package main
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "strconv"
+)
+
+var anonymousTokens tokenSet
+
+type tokenSet []string
+
+func (ts *tokenSet) Set(s string) error {
+ v, err := strconv.ParseBool(s)
+ if v && len(*ts) == 0 {
+ *ts = append(*ts, os.Getenv("ARVADOS_API_TOKEN"))
+ } else if !v {
+ *ts = (*ts)[:0]
+ }
+ return err
+}
+
+func (ts *tokenSet) String() string {
+ return fmt.Sprintf("%v", len(*ts) > 0)
+}
+
+func (ts *tokenSet) IsBoolFlag() bool {
+ return true
+}
+
+func init() {
+ flag.Var(&anonymousTokens, "allow-anonymous",
+ "Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection.")
+}
--- /dev/null
+// Keep-web provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
+//
+// See http://doc.arvados.org/install/install-keep-web.html.
+//
+// Run "keep-web -help" to show all supported options.
+//
+// Starting the server
+//
+// Serve HTTP requests at port 1234 on all interfaces:
+//
+// keep-web -listen=:1234
+//
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
+//
+// keep-web -listen=1.2.3.4:1234
+//
+// Proxy configuration
+//
+// Keep-web does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+// Here is an example nginx configuration.
+//
+// http {
+// upstream keep-web {
+// server localhost:1234;
+// }
+// server {
+// listen *:443 ssl;
+// server_name collections.example.com *.collections.example.com ~.*--collections.example.com;
+// ssl_certificate /root/wildcard.example.com.crt;
+// ssl_certificate_key /root/wildcard.example.com.key;
+// location / {
+// proxy_pass http://keep-web;
+// proxy_set_header Host $host;
+// proxy_set_header X-Forwarded-For $remote_addr;
+// }
+// }
+// }
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Anonymous downloads
+//
+// Use the -allow-anonymous flag with an ARVADOS_API_TOKEN environment
+// variable to specify a token to use when clients try to retrieve
+// files without providing their own Arvados API token.
+//
+// export ARVADOS_API_TOKEN=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+// keep-web [...] -allow-anonymous
+//
+// See http://doc.arvados.org/install/install-keep-web.html for examples.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections and collections shared anonymously via secret links
+// (i.e., collections which can be served by keep-web without making
+// use of any implicit credentials like cookies). See "Same-origin
+// URLs" below.
+//
+// http://collections.example.com/c=uuid_or_pdh/path/file.txt
+// http://collections.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+// http://uuid_or_pdh--collections.example.com/path/file.txt
+// http://uuid_or_pdh--collections.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the downstream proxy is
+// configured accordingly). These two are equivalent:
+//
+// http://uuid_or_pdh--collections.example.com/path/file.txt
+// http://uuid_or_pdh.collections.example.com/path/file.txt
+//
+// The first form (with "--" instead of ".") avoids the cost and
+// effort of deploying a wildcard TLS certificate for
+// *.collections.example.com at sites that already have a wildcard
+// certificate for *.example.com. The second form is likely to be
+// easier to configure, and more efficient to run, on a downstream
+// proxy.
+//
+// In all of the above forms, the "collections.example.com" part can
+// be anything at all: keep-web itself ignores everything after the
+// first "." or "--". (Of course, in order for clients to connect at
+// all, DNS and any relevant proxies must be configured accordingly.)
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// optionally replaced by "-". (When "uuid_or_pdh" appears in the
+// domain name, replacing "+" with "-" is mandatory, because "+" is
+// not a valid character in a domain name.)
+//
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+// http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/foo/bar.txt
+// http://zzzzz-4zz18-znfnqtbbv4spc3w.collections.example.com/_/foo/bar.txt
+// http://zzzzz-4zz18-znfnqtbbv4spc3w--collections.example.com/_/foo/bar.txt
+// http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo/bar.txt
+// http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo/bar.txt
+//
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+// http://collections.example.com/collections/download/uuid_or_pdh/TOKEN/foo/bar.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+// http://collections.example.com/collections/uuid_or_pdh/foo/bar.txt
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+// Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+// Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+// GET /foo/bar.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+// POST /foo/bar.txt
+// Content-Type: application/x-www-form-urlencoded
+// [...]
+// api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Indexes
+//
+// Currently, keep-web does not generate HTML index listings, nor does
+// it serve a default file like "index.html" when a directory is
+// requested. These features are likely to be added in future
+// versions. Until then, keep-web responds with 404 if a directory
+// name (or any path ending with "/") is requested.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the downstream proxy should configured to
+// return 401 for all paths beginning with "/c=".
+//
+// Same-origin URLs
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://collections.example.com/'') and upload it to some other
+// site chosen by the author of collection X.
+//
+// Attachment-Only host
+//
+// It is possible to serve untrusted content and accept user
+// credentials at the same origin as long as the content is only
+// downloaded, never executed by browsers. A single origin (hostname
+// and port) can be designated as an "attachment-only" origin: cookies
+// will be accepted and all responses will have a
+// "Content-Disposition: attachment" header. This behavior is invoked
+// only when the designated origin matches exactly the Host header
+// provided by the client or downstream proxy.
+//
+// keep-web -listen :9999 -attachment-only-host domain.example:9999
+//
+// Trust All Content mode
+//
+// In "trust all content" mode, Keep-web will accept credentials (API
+// tokens) and serve any collection X at
+// "https://collections.example.com/collections/X/path/file.ext".
+// This is UNSAFE except in the special case where everyone who is
+// able write ANY data to Keep, and every JavaScript and HTML file
+// written to Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+// keep-web -listen :9999 -trust-all-content
+//
+// When using trust-all-content mode, the only effect of the
+// -attachment-only-host option is to add a "Content-Disposition:
+// attachment" header.
+//
+// keep-web -listen :9999 -attachment-only-host domain.example:9999 -trust-all-content
+//
+package main
--- /dev/null
+package main
+
+import (
+ "flag"
+ "fmt"
+ "html"
+ "io"
+ "mime"
+ "net/http"
+ "net/url"
+ "os"
+ "strings"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/auth"
+ "git.curoverse.com/arvados.git/sdk/go/httpserver"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+type handler struct{}
+
+var (
+ clientPool = arvadosclient.MakeClientPool()
+ trustAllContent = false
+ attachmentOnlyHost = ""
+)
+
+func init() {
+ flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
+ "Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
+ flag.BoolVar(&trustAllContent, "trust-all-content", false,
+ "Serve non-public content from a single origin. Dangerous: read docs before using!")
+}
+
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
+func parseCollectionIDFromDNSName(s string) string {
+ // Strip domain.
+ if i := strings.IndexRune(s, '.'); i >= 0 {
+ s = s[:i]
+ }
+ // Names like {uuid}--collections.example.com serve the same
+ // purpose as {uuid}.collections.example.com but can reduce
+ // cost/effort of using [additional] wildcard certificates.
+ if i := strings.Index(s, "--"); i >= 0 {
+ s = s[:i]
+ }
+ if arvadosclient.UUIDMatch(s) {
+ return s
+ }
+ if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+ return pdh
+ }
+ return ""
+}
+
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIDFromURL(s string) string {
+ if arvadosclient.UUIDMatch(s) {
+ return s
+ }
+ if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+ return pdh
+ }
+ return ""
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+ var statusCode = 0
+ var statusText string
+
+ remoteAddr := r.RemoteAddr
+ if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+ remoteAddr = xff + "," + remoteAddr
+ }
+
+ w := httpserver.WrapResponseWriter(wOrig)
+ defer func() {
+ if statusCode == 0 {
+ statusCode = w.WroteStatus()
+ } else if w.WroteStatus() == 0 {
+ w.WriteHeader(statusCode)
+ } else if w.WroteStatus() != statusCode {
+ httpserver.Log(r.RemoteAddr, "WARNING",
+ fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+ }
+ if statusText == "" {
+ statusText = http.StatusText(statusCode)
+ }
+ httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+ }()
+
+ if r.Method != "GET" && r.Method != "POST" {
+ statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+ return
+ }
+
+ arv := clientPool.Get()
+ if arv == nil {
+ statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+ return
+ }
+ defer clientPool.Put(arv)
+
+ pathParts := strings.Split(r.URL.Path[1:], "/")
+
+ var targetID string
+ var targetPath []string
+ var tokens []string
+ var reqTokens []string
+ var pathToken bool
+ var attachment bool
+ credentialsOK := trustAllContent
+
+ if r.Host != "" && r.Host == attachmentOnlyHost {
+ credentialsOK = true
+ attachment = true
+ } else if r.FormValue("disposition") == "attachment" {
+ attachment = true
+ }
+
+ if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
+ // http://ID.collections.example/PATH...
+ credentialsOK = true
+ targetPath = pathParts
+ } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+ // /c=ID/PATH...
+ targetID = parseCollectionIDFromURL(pathParts[0][2:])
+ targetPath = pathParts[1:]
+ } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+ if len(pathParts) >= 5 && pathParts[1] == "download" {
+ // /collections/download/ID/TOKEN/PATH...
+ targetID = pathParts[2]
+ tokens = []string{pathParts[3]}
+ targetPath = pathParts[4:]
+ pathToken = true
+ } else {
+ // /collections/ID/PATH...
+ targetID = pathParts[1]
+ tokens = anonymousTokens
+ targetPath = pathParts[2:]
+ }
+ } else {
+ statusCode = http.StatusNotFound
+ return
+ }
+ if t := r.FormValue("api_token"); t != "" {
+ // The client provided an explicit token in the query
+ // string, or a form in POST body. We must put the
+ // token in an HttpOnly cookie, and redirect to the
+ // same URL with the query param redacted and method =
+ // GET.
+
+ if !credentialsOK {
+ // It is not safe to copy the provided token
+ // into a cookie unless the current vhost
+ // (origin) serves only a single collection or
+ // we are in trustAllContent mode.
+ statusCode = http.StatusBadRequest
+ return
+ }
+
+ // The HttpOnly flag is necessary to prevent
+ // JavaScript code (included in, or loaded by, a page
+ // in the collection being served) from employing the
+ // user's token beyond reading other files in the same
+ // domain, i.e., same collection.
+ //
+ // The 303 redirect is necessary in the case of a GET
+ // request to avoid exposing the token in the Location
+ // bar, and in the case of a POST request to avoid
+ // raising warnings when the user refreshes the
+ // resulting page.
+
+ http.SetCookie(w, &http.Cookie{
+ Name: "arvados_api_token",
+ Value: auth.EncodeTokenCookie([]byte(t)),
+ Path: "/",
+ HttpOnly: true,
+ })
+ redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+ w.Header().Add("Location", redir)
+ statusCode, statusText = http.StatusSeeOther, redir
+ w.WriteHeader(statusCode)
+ io.WriteString(w, `<A href="`)
+ io.WriteString(w, html.EscapeString(redir))
+ io.WriteString(w, `">Continue</A>`)
+ return
+ }
+
+ if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+ // http://ID.example/t=TOKEN/PATH...
+ // /c=ID/t=TOKEN/PATH...
+ //
+ // This form must only be used to pass scoped tokens
+ // that give permission for a single collection. See
+ // FormValue case above.
+ tokens = []string{targetPath[0][2:]}
+ pathToken = true
+ targetPath = targetPath[1:]
+ }
+
+ if tokens == nil {
+ if credentialsOK {
+ reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+ }
+ tokens = append(reqTokens, anonymousTokens...)
+ }
+
+ if len(targetPath) > 0 && targetPath[0] == "_" {
+ // If a collection has a directory called "t=foo" or
+ // "_", it can be served at
+ // //collections.example/_/t=foo/ or
+ // //collections.example/_/_/ respectively:
+ // //collections.example/t=foo/ won't work because
+ // t=foo will be interpreted as a token "foo".
+ targetPath = targetPath[1:]
+ }
+
+ tokenResult := make(map[string]int)
+ collection := make(map[string]interface{})
+ found := false
+ for _, arv.ApiToken = range tokens {
+ err := arv.Get("collections", targetID, nil, &collection)
+ if err == nil {
+ // Success
+ found = true
+ break
+ }
+ if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+ switch srvErr.HttpStatusCode {
+ case 404, 401:
+ // Token broken or insufficient to
+ // retrieve collection
+ tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+ continue
+ }
+ }
+ // Something more serious is wrong
+ statusCode, statusText = http.StatusInternalServerError, err.Error()
+ return
+ }
+ if !found {
+ if pathToken || !credentialsOK {
+ // Either the URL is a "secret sharing link"
+ // that didn't work out (and asking the client
+ // for additional credentials would just be
+ // confusing), or we don't even accept
+ // credentials at this path.
+ statusCode = http.StatusNotFound
+ return
+ }
+ for _, t := range reqTokens {
+ if tokenResult[t] == 404 {
+ // The client provided valid token(s), but the
+ // collection was not found.
+ statusCode = http.StatusNotFound
+ return
+ }
+ }
+ // The client's token was invalid (e.g., expired), or
+ // the client didn't even provide one. Propagate the
+ // 401 to encourage the client to use a [different]
+ // token.
+ //
+ // TODO(TC): This response would be confusing to
+ // someone trying (anonymously) to download public
+ // data that has been deleted. Allow a referrer to
+ // provide this context somehow?
+ w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
+ statusCode = http.StatusUnauthorized
+ return
+ }
+
+ filename := strings.Join(targetPath, "/")
+ kc, err := keepclient.MakeKeepClient(arv)
+ if err != nil {
+ statusCode, statusText = http.StatusInternalServerError, err.Error()
+ return
+ }
+ rdr, err := kc.CollectionFileReader(collection, filename)
+ if os.IsNotExist(err) {
+ statusCode = http.StatusNotFound
+ return
+ } else if err != nil {
+ statusCode, statusText = http.StatusBadGateway, err.Error()
+ return
+ }
+ defer rdr.Close()
+
+ // One or both of these can be -1 if not found:
+ basenamePos := strings.LastIndex(filename, "/")
+ extPos := strings.LastIndex(filename, ".")
+ if extPos > basenamePos {
+ // Now extPos is safely >= 0.
+ if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+ w.Header().Set("Content-Type", t)
+ }
+ }
+ if rdr, ok := rdr.(keepclient.ReadCloserWithLen); ok {
+ w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
+ }
+ if attachment {
+ w.Header().Set("Content-Disposition", "attachment")
+ }
+
+ w.WriteHeader(http.StatusOK)
+ _, err = io.Copy(w, rdr)
+ if err != nil {
+ statusCode, statusText = http.StatusBadGateway, err.Error()
+ }
+}
--- /dev/null
+package main
+
+import (
+ "html"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "net/url"
+ "regexp"
+ "strings"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ "git.curoverse.com/arvados.git/sdk/go/auth"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct{}
+
+func mustParseURL(s string) *url.URL {
+ r, err := url.Parse(s)
+ if err != nil {
+ panic("parse URL: " + s)
+ }
+ return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+ for _, testURL := range []string{
+ arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+ arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+ } {
+ resp := httptest.NewRecorder()
+ req := &http.Request{
+ Method: "GET",
+ URL: mustParseURL(testURL),
+ }
+ (&handler{}).ServeHTTP(resp, req)
+ c.Check(resp.Code, check.Equals, http.StatusNotFound)
+ c.Check(resp.Body.String(), check.Equals, "")
+ }
+}
+
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keep-web if
+// the token is invalid.
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+ doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+ r.Header.Add("Authorization", "OAuth2 "+tok)
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+ doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+ r.AddCookie(&http.Cookie{
+ Name: "arvados_api_token",
+ Value: auth.EncodeTokenCookie([]byte(tok)),
+ })
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+ doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+ r.URL.Path = "/t=" + tok + r.URL.Path
+ return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+ doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+ r.URL.RawQuery = "api_token=" + tok
+ return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+ doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+ r.Method = "POST"
+ r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+ r.Body = ioutil.NopCloser(strings.NewReader(
+ url.Values{"api_token": {tok}}.Encode()))
+ return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+ for _, hostPath := range []string{
+ arvadostest.FooCollection + ".example.com/foo",
+ arvadostest.FooCollection + "--collections.example.com/foo",
+ arvadostest.FooCollection + "--collections.example.com/_/foo",
+ arvadostest.FooPdh + ".example.com/foo",
+ strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--collections.example.com/foo",
+ arvadostest.FooBarDirCollection + ".example.com/dir1/foo",
+ } {
+ c.Log("doRequests: ", hostPath)
+ doVhostRequestsWithHostPath(c, authz, hostPath)
+ }
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
+ for _, tok := range []string{
+ arvadostest.ActiveToken,
+ arvadostest.ActiveToken[:15],
+ arvadostest.SpectatorToken,
+ "bogus",
+ "",
+ } {
+ u := mustParseURL("http://" + hostPath)
+ req := &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{},
+ }
+ failCode := authz(req, tok)
+ resp := doReq(req)
+ code, body := resp.Code, resp.Body.String()
+ if tok == arvadostest.ActiveToken {
+ c.Check(code, check.Equals, http.StatusOK)
+ c.Check(body, check.Equals, "foo")
+ } else {
+ c.Check(code >= 400, check.Equals, true)
+ c.Check(code < 500, check.Equals, true)
+ if tok == arvadostest.SpectatorToken {
+ // Valid token never offers to retry
+ // with different credentials.
+ c.Check(code, check.Equals, http.StatusNotFound)
+ } else {
+ // Invalid token can ask to retry
+ // depending on the authz method.
+ c.Check(code, check.Equals, failCode)
+ }
+ c.Check(body, check.Equals, "")
+ }
+ }
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+ resp := httptest.NewRecorder()
+ (&handler{}).ServeHTTP(resp, req)
+ if resp.Code != http.StatusSeeOther {
+ return resp
+ }
+ cookies := (&http.Response{Header: resp.Header()}).Cookies()
+ u, _ := req.URL.Parse(resp.Header().Get("Location"))
+ req = &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{},
+ }
+ for _, c := range cookies {
+ req.AddCookie(c)
+ }
+ return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ arvadostest.FooCollection+".example.com/foo",
+ "?api_token="+arvadostest.ActiveToken,
+ "",
+ "",
+ http.StatusOK,
+ "foo",
+ )
+}
+
+func (s *IntegrationSuite) TestSingleOriginSecretLink(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.FooCollection+"/t="+arvadostest.ActiveToken+"/foo",
+ "",
+ "",
+ "",
+ http.StatusOK,
+ "foo",
+ )
+}
+
+// Bad token in URL is 404 Not Found because it doesn't make sense to
+// retry the same URL with different authorization.
+func (s *IntegrationSuite) TestSingleOriginSecretLinkBadToken(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.FooCollection+"/t=bogus/foo",
+ "",
+ "",
+ "",
+ http.StatusNotFound,
+ "",
+ )
+}
+
+// Bad token in a cookie (even if it got there via our own
+// query-string-to-cookie redirect) is, in principle, retryable at the
+// same URL so it's 401 Unauthorized.
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToBogusCookie(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ arvadostest.FooCollection+".example.com/foo",
+ "?api_token=thisisabogustoken",
+ "",
+ "",
+ http.StatusUnauthorized,
+ "",
+ )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.FooCollection+"/foo",
+ "?api_token="+arvadostest.ActiveToken,
+ "",
+ "",
+ http.StatusBadRequest,
+ "",
+ )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
+ defer func(orig bool) {
+ trustAllContent = orig
+ }(trustAllContent)
+ trustAllContent = true
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.FooCollection+"/foo",
+ "?api_token="+arvadostest.ActiveToken,
+ "",
+ "",
+ http.StatusOK,
+ "foo",
+ )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
+ defer func(orig string) {
+ attachmentOnlyHost = orig
+ }(attachmentOnlyHost)
+ attachmentOnlyHost = "example.com:1234"
+
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.FooCollection+"/foo",
+ "?api_token="+arvadostest.ActiveToken,
+ "",
+ "",
+ http.StatusBadRequest,
+ "",
+ )
+
+ resp := s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com:1234/c="+arvadostest.FooCollection+"/foo",
+ "?api_token="+arvadostest.ActiveToken,
+ "",
+ "",
+ http.StatusOK,
+ "foo",
+ )
+ c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "POST",
+ arvadostest.FooCollection+".example.com/foo",
+ "",
+ "application/x-www-form-urlencoded",
+ url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+ http.StatusOK,
+ "foo",
+ )
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+ s.testVhostRedirectTokenToCookie(c, "POST",
+ arvadostest.FooCollection+".example.com/foo",
+ "",
+ "application/x-www-form-urlencoded",
+ url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+ http.StatusNotFound,
+ "",
+ )
+}
+
+func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
+ anonymousTokens = []string{arvadostest.AnonymousToken}
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
+ "",
+ "",
+ "",
+ http.StatusOK,
+ "Hello world\n",
+ )
+}
+
+func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
+ anonymousTokens = []string{"anonymousTokenConfiguredButInvalid"}
+ s.testVhostRedirectTokenToCookie(c, "GET",
+ "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
+ "",
+ "",
+ "",
+ http.StatusNotFound,
+ "",
+ )
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, reqBody string, expectStatus int, expectRespBody string) *httptest.ResponseRecorder {
+ u, _ := url.Parse(`http://` + hostPath + queryString)
+ req := &http.Request{
+ Method: method,
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{"Content-Type": {contentType}},
+ Body: ioutil.NopCloser(strings.NewReader(reqBody)),
+ }
+
+ resp := httptest.NewRecorder()
+ defer func() {
+ c.Check(resp.Code, check.Equals, expectStatus)
+ c.Check(resp.Body.String(), check.Equals, expectRespBody)
+ }()
+
+ (&handler{}).ServeHTTP(resp, req)
+ if resp.Code != http.StatusSeeOther {
+ return resp
+ }
+ c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`".*`)
+ cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+ u, _ = u.Parse(resp.Header().Get("Location"))
+ req = &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ Header: http.Header{},
+ }
+ for _, c := range cookies {
+ req.AddCookie(c)
+ }
+
+ resp = httptest.NewRecorder()
+ (&handler{}).ServeHTTP(resp, req)
+ c.Check(resp.Header().Get("Location"), check.Equals, "")
+ return resp
+}
--- /dev/null
+package main
+
+import (
+ "flag"
+ "log"
+ "os"
+)
+
+func init() {
+ // MakeArvadosClient returns an error if this env var isn't
+ // available as a default token (even if we explicitly set a
+ // different token before doing anything with the client). We
+ // set this dummy value during init so it doesn't clobber the
+ // one used by "run test servers".
+ if os.Getenv("ARVADOS_API_TOKEN") == "" {
+ os.Setenv("ARVADOS_API_TOKEN", "xxx")
+ }
+}
+
+func main() {
+ flag.Parse()
+ if os.Getenv("ARVADOS_API_HOST") == "" {
+ log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+ }
+ srv := &server{}
+ if err := srv.Start(); err != nil {
+ log.Fatal(err)
+ }
+ log.Println("Listening at", srv.Addr)
+ if err := srv.Wait(); err != nil {
+ log.Fatal(err)
+ }
+}
--- /dev/null
+package main
+
+import (
+ "flag"
+ "net/http"
+
+ "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+ flag.StringVar(&address, "listen", ":80",
+ "Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
+}
+
+type server struct {
+ httpserver.Server
+}
+
+func (srv *server) Start() error {
+ mux := http.NewServeMux()
+ mux.Handle("/", &handler{})
+ srv.Handler = mux
+ srv.Addr = address
+ return srv.Server.Start()
+}
--- /dev/null
+package main
+
+import (
+ "crypto/md5"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "os/exec"
+ "strings"
+ "testing"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server and a keep-web server
+type IntegrationSuite struct {
+ testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+ for _, token := range []string{
+ "",
+ "bogustoken",
+ } {
+ hdr, body, _ := s.runCurl(c, token, "collections.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
+ c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+ c.Check(body, check.Equals, "")
+
+ if token != "" {
+ hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+ c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+ c.Check(body, check.Equals, "")
+ }
+
+ hdr, body, _ = s.runCurl(c, token, "collections.example.com", "/bad-route")
+ c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+ c.Check(body, check.Equals, "")
+ }
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+ for _, uri := range []string{
+ // Routing errors (always 404 regardless of what's stored in Keep)
+ "/",
+ "/foo",
+ "/download",
+ "/collections",
+ "/collections/",
+ // Implicit/generated index is not implemented yet;
+ // until then, return 404.
+ "/collections/" + arvadostest.FooCollection,
+ "/collections/" + arvadostest.FooCollection + "/",
+ "/collections/" + arvadostest.FooBarDirCollection + "/dir1",
+ "/collections/" + arvadostest.FooBarDirCollection + "/dir1/",
+ // Non-existent file in collection
+ "/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+ "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+ // Non-existent collection
+ "/collections/" + arvadostest.NonexistentCollection,
+ "/collections/" + arvadostest.NonexistentCollection + "/",
+ "/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+ "/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+ } {
+ hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "collections.example.com", uri)
+ c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+ c.Check(body, check.Equals, "")
+ }
+}
+
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+ if testing.Short() {
+ c.Skip("skipping 1GB integration test in short mode")
+ }
+ s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+ s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+ testdata := make([]byte, blocksize)
+ for i := 0; i < blocksize; i++ {
+ testdata[i] = byte(' ')
+ }
+ arv, err := arvadosclient.MakeArvadosClient()
+ c.Assert(err, check.Equals, nil)
+ arv.ApiToken = arvadostest.ActiveToken
+ kc, err := keepclient.MakeKeepClient(&arv)
+ c.Assert(err, check.Equals, nil)
+ loc, _, err := kc.PutB(testdata[:])
+ c.Assert(err, check.Equals, nil)
+ mtext := "."
+ for i := 0; i < 100; i++ {
+ mtext = mtext + " " + loc
+ }
+ mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+ coll := map[string]interface{}{}
+ err = arv.Create("collections",
+ map[string]interface{}{
+ "collection": map[string]interface{}{
+ "name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+ "manifest_text": mtext,
+ },
+ }, &coll)
+ c.Assert(err, check.Equals, nil)
+ uuid := coll["uuid"].(string)
+
+ hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".collections.example.com", "/testdata.bin")
+ c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+ c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+ c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+ c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
+type curlCase struct {
+ auth string
+ host string
+ path string
+ dataMD5 string
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+ anonymousTokens = []string{arvadostest.AnonymousToken}
+ for _, spec := range []curlCase{
+ // My collection
+ {
+ auth: arvadostest.ActiveToken,
+ host: arvadostest.FooCollection + "--collections.example.com",
+ path: "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ auth: arvadostest.ActiveToken,
+ host: arvadostest.FooCollection + ".collections.example.com",
+ path: "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".collections.example.com",
+ path: "/t=" + arvadostest.ActiveToken + "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ auth: "tokensobogus",
+ path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ auth: arvadostest.ActiveToken,
+ path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+ {
+ auth: arvadostest.AnonymousToken,
+ path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+ dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+ },
+
+ // Anonymously accessible data
+ {
+ path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ host: arvadostest.HelloWorldCollection + ".collections.example.com",
+ path: "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ host: arvadostest.HelloWorldCollection + ".collections.example.com",
+ path: "/_/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ auth: arvadostest.ActiveToken,
+ path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ auth: arvadostest.SpectatorToken,
+ path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ auth: arvadostest.SpectatorToken,
+ host: arvadostest.HelloWorldCollection + "--collections.example.com",
+ path: "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ {
+ auth: arvadostest.SpectatorToken,
+ path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+ dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+ },
+ } {
+ host := spec.host
+ if host == "" {
+ host = "collections.example.com"
+ }
+ hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
+ c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+ if strings.HasSuffix(spec.path, ".txt") {
+ c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+ // TODO: Check some types that aren't
+ // automatically detected by Go's http server
+ // by sniffing the content.
+ }
+ c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
+ }
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
+ curlArgs := []string{"--silent", "--show-error", "--include"}
+ testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+ curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
+ if token != "" {
+ curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+ }
+ curlArgs = append(curlArgs, args...)
+ curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
+ c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+ cmd := exec.Command("curl", curlArgs...)
+ stdout, err := cmd.StdoutPipe()
+ c.Assert(err, check.Equals, nil)
+ cmd.Stderr = cmd.Stdout
+ go cmd.Start()
+ buf := make([]byte, 2<<27)
+ n, err := io.ReadFull(stdout, buf)
+ // Discard (but measure size of) anything past 128 MiB.
+ var discarded int64
+ if err == io.ErrUnexpectedEOF {
+ err = nil
+ buf = buf[:n]
+ } else {
+ c.Assert(err, check.Equals, nil)
+ discarded, err = io.Copy(ioutil.Discard, stdout)
+ c.Assert(err, check.Equals, nil)
+ }
+ err = cmd.Wait()
+ // Without "-f", curl exits 0 as long as it gets a valid HTTP
+ // response from the server, even if the response status
+ // indicates that the request failed. In our test suite, we
+ // always expect a valid HTTP response, and we parse the
+ // headers ourselves. If curl exits non-zero, our testing
+ // environment is broken.
+ c.Assert(err, check.Equals, nil)
+ hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
+ c.Assert(len(hdrsAndBody), check.Equals, 2)
+ hdr = hdrsAndBody[0]
+ bodyPart = hdrsAndBody[1]
+ bodySize = int64(len(bodyPart)) + discarded
+ return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+ arvadostest.StartAPI()
+ arvadostest.StartKeep(2, true)
+
+ arv, err := arvadosclient.MakeArvadosClient()
+ c.Assert(err, check.Equals, nil)
+ arv.ApiToken = arvadostest.ActiveToken
+ kc, err := keepclient.MakeKeepClient(&arv)
+ c.Assert(err, check.Equals, nil)
+ kc.PutB([]byte("Hello world\n"))
+ kc.PutB([]byte("foo"))
+ kc.PutB([]byte("foobar"))
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+ arvadostest.StopKeep(2)
+ arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+ arvadostest.ResetEnv()
+ s.testServer = &server{}
+ var err error
+ address = "127.0.0.1:0"
+ err = s.testServer.Start()
+ c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+ var err error
+ if s.testServer != nil {
+ err = s.testServer.Close()
+ }
+ c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+ check.TestingT(t)
+}
pidfile string
)
- flagset := flag.NewFlagSet("default", flag.ExitOnError)
+ flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
flagset.StringVar(
&listen,
log.Fatalf("Error setting up arvados client %s", err.Error())
}
+ if os.Getenv("ARVADOS_DEBUG") != "" {
+ keepclient.DebugPrintf = log.Printf
+ }
kc, err := keepclient.MakeKeepClient(&arv)
if err != nil {
log.Fatalf("Error setting up keep client %s", err.Error())
return req.RemoteAddr
}
-func CheckAuthorizationHeader(kc keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
+func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, req *http.Request) (pass bool, tok string) {
var auth string
if auth = req.Header.Get("Authorization"); auth == "" {
return false, ""
var pass bool
var tok string
- if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
+ if pass, tok = CheckAuthorizationHeader(&kc, this.ApiTokenCache, req); !pass {
status, err = http.StatusForbidden, BadAuthorizationHeader
return
}
var pass bool
var tok string
- if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
+ if pass, tok = CheckAuthorizationHeader(&kc, this.ApiTokenCache, req); !pass {
err = BadAuthorizationHeader
status = http.StatusForbidden
return
kc := *handler.KeepClient
- ok, token := CheckAuthorizationHeader(kc, handler.ApiTokenCache, req)
+ ok, token := CheckAuthorizationHeader(&kc, handler.ApiTokenCache, req)
if !ok {
status, err = http.StatusForbidden, BadAuthorizationHeader
return
}
}
-func runProxy(c *C, args []string, port int, bogusClientToken bool) keepclient.KeepClient {
+func runProxy(c *C, args []string, port int, bogusClientToken bool) *keepclient.KeepClient {
if bogusClientToken {
os.Setenv("ARVADOS_API_TOKEN", "bogus-token")
}
go main()
}
- return kc
+ return &kc
}
func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
_, rep, err = kc.PutB([]byte("some-more-index-data"))
c.Check(err, Equals, nil)
+ kc.Arvados.ApiToken = arvadostest.DataManagerToken
+
// Invoke GetIndex
for _, spec := range []struct {
prefix string
// in order to permit writes.
const MinFreeKilobytes = BlockSize / 1024
-// Until #6221 is resolved, never_delete must be true.
-// However, allow it to be false in testing with TestDataManagerToken
-const TestDataManagerToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
-
// ProcMounts /proc/mounts
var ProcMounts = "/proc/mounts"
&neverDelete,
"never-delete",
true,
- "If set, nothing will be deleted. HTTP 405 will be returned "+
- "for valid DELETE requests.")
+ "If true, nothing will be deleted. "+
+ "Warning: the relevant features in keepstore and data manager have not been extensively tested. "+
+ "You should leave this option alone unless you can afford to lose data.")
flag.StringVar(
&blobSigningKeyFile,
"permission-key-file",
}
}
- if neverDelete != true && dataManagerToken != TestDataManagerToken {
- log.Fatal("never_delete must be true, see #6221")
+ if neverDelete != true {
+ log.Print("never-delete is not set. Warning: the relevant features in keepstore and data manager have not " +
+ "been extensively tested. You should leave this option alone unless you can afford to lose data.")
}
if blobSigningKeyFile != "" {
This actor simply destroys a cloud node, retrying as needed.
"""
+ # Reasons for a shutdown to be cancelled.
+ WINDOW_CLOSED = "shutdown window closed"
+ NODE_BROKEN = "cloud failed to shut down broken node"
+
def __init__(self, timer_actor, cloud_client, arvados_client, node_monitor,
cancellable=True, retry_wait=1, max_retry_wait=180):
# If a ShutdownActor is cancellable, it will ask the
self._monitor = node_monitor.proxy()
self.cloud_node = self._monitor.cloud_node.get()
self.cancellable = cancellable
+ self.cancel_reason = None
self.success = None
def on_start(self):
self.success = success_flag
return super(ComputeNodeShutdownActor, self)._finished()
- def cancel_shutdown(self):
+ def cancel_shutdown(self, reason):
+ self.cancel_reason = reason
+ self._logger.info("Cloud node %s shutdown cancelled: %s.",
+ self.cloud_node.id, reason)
self._finished(success_flag=False)
def _stop_if_window_closed(orig_func):
def stop_wrapper(self, *args, **kwargs):
if (self.cancellable and
(not self._monitor.shutdown_eligible().get())):
- self._logger.info(
- "Cloud node %s shutdown cancelled - no longer eligible.",
- self.cloud_node.id)
- self._later.cancel_shutdown()
+ self._later.cancel_shutdown(self.WINDOW_CLOSED)
return None
else:
return orig_func(self, *args, **kwargs)
@ComputeNodeStateChangeBase._retry()
def shutdown_node(self):
if not self._cloud.destroy_node(self.cloud_node):
- # Force a retry.
- raise cloud_types.LibcloudError("destroy_node failed")
+ if self._cloud.broken(self.cloud_node):
+ self._later.cancel_shutdown(self.NODE_BROKEN)
+ else:
+ # Force a retry.
+ raise cloud_types.LibcloudError("destroy_node failed")
self._logger.info("Cloud node %s shut down.", self.cloud_node.id)
arv_node = self._arvados_node()
if arv_node is None:
# error are still being investigated.
@ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
- def cancel_shutdown(self):
+ def cancel_shutdown(self, reason):
if self._nodename:
if self._get_slurm_state() in self.SLURM_DRAIN_STATES:
# Resume from "drng" or "drain"
# Node is in a state such as 'idle' or 'alloc' so don't
# try to resume it because that will just raise an error.
pass
- return super(ComputeNodeShutdownActor, self).cancel_shutdown()
+ return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
@ShutdownActorBase._retry((subprocess.CalledProcessError, OSError))
@ShutdownActorBase._stop_if_window_closed
def __init__(self):
self.nodes = {}
self.orphans = {}
+ self._blacklist = set()
# Proxy the methods listed below to self.nodes.
def _proxy_method(name):
def add(self, record):
self.nodes[self.record_key(record)] = record
+ def blacklist(self, key):
+ self._blacklist.add(key)
+
def update_record(self, key, item):
setattr(self.nodes[key], self.RECORD_ATTR, item)
unseen = set(self.nodes.iterkeys())
for item in response:
key = self.item_key(item)
- if key in unseen:
+ if key in self._blacklist:
+ continue
+ elif key in unseen:
unseen.remove(key)
self.update_record(key, item)
else:
self._begin_node_shutdown(record.actor, cancellable=False)
def node_finished_shutdown(self, shutdown_actor):
- success, cloud_node = self._get_actor_attrs(shutdown_actor, 'success',
- 'cloud_node')
+ cloud_node, success, cancel_reason = self._get_actor_attrs(
+ shutdown_actor, 'cloud_node', 'success', 'cancel_reason')
shutdown_actor.stop()
cloud_node_id = cloud_node.id
if not success:
+ if cancel_reason == self._node_shutdown.NODE_BROKEN:
+ self.cloud_nodes.blacklist(cloud_node_id)
del self.shutdowns[cloud_node_id]
elif cloud_node_id in self.booted:
self.booted.pop(cloud_node_id).actor.stop()
class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
def make_mocks(self, cloud_node=None, arvados_node=None,
- shutdown_open=True):
+ shutdown_open=True, node_broken=False):
self.timer = testutil.MockTimer()
self.shutdowns = testutil.MockShutdownTimer()
self.shutdowns._set_state(shutdown_open, 300)
self.cloud_client = mock.MagicMock(name='cloud_client')
+ self.cloud_client.broken.return_value = node_broken
self.arvados_client = mock.MagicMock(name='arvados_client')
self.updates = mock.MagicMock(name='update_mock')
if cloud_node is None:
self.make_actor()
self.check_success_flag(False, 2)
self.assertFalse(self.cloud_client.destroy_node.called)
+ self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED,
+ self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
def test_shutdown_retries_when_cloud_fails(self):
self.make_mocks()
self.cloud_client.destroy_node.return_value = True
self.check_success_flag(True)
+ def test_shutdown_cancelled_when_cloud_fails_on_broken_node(self):
+ self.make_mocks(node_broken=True)
+ self.cloud_client.destroy_node.return_value = False
+ self.make_actor(start_time=0)
+ self.check_success_flag(False, 2)
+ self.assertEqual(1, self.cloud_client.destroy_node.call_count)
+ self.assertEqual(self.ACTOR_CLASS.NODE_BROKEN,
+ self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
+
def test_late_subscribe(self):
self.make_actor()
subscriber = mock.Mock(name='subscriber_mock')
self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
self.assertTrue(shutdown_proxy.called)
+ def test_broken_node_blackholed_after_cancelled_shutdown(self):
+ cloud_node = testutil.cloud_node_mock(8)
+ wishlist = [testutil.MockSize(8)]
+ self.make_daemon([cloud_node], [testutil.arvados_node_mock(8)],
+ wishlist)
+ self.assertEqual(1, self.alive_monitor_count())
+ self.assertFalse(self.node_setup.start.called)
+ monitor = self.monitor_list()[0].proxy()
+ shutdown_proxy = self.node_shutdown.start().proxy
+ shutdown_proxy().cloud_node.get.return_value = cloud_node
+ shutdown_proxy().success.get.return_value = False
+ shutdown_proxy().cancel_reason.get.return_value = self.node_shutdown.NODE_BROKEN
+ self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
+ self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+ self.daemon.node_finished_shutdown(shutdown_proxy()).get(self.TIMEOUT)
+ self.daemon.update_cloud_nodes([cloud_node]).get(self.TIMEOUT)
+ self.daemon.update_server_wishlist(wishlist).get(self.TIMEOUT)
+ self.stop_proxy(self.daemon)
+ self.assertEqual(1, self.node_setup.start.call_count)
+
def test_nodes_shutting_down_replaced_below_max_nodes(self):
cloud_node = testutil.cloud_node_mock(6)
self.make_daemon([cloud_node], [testutil.arvados_node_mock(6)])
// srcConfig
var srcConfig apiConfig
srcConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
- srcConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+ srcConfig.APIToken = arvadostest.DataManagerToken
srcConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
// dstConfig
var dstConfig apiConfig
dstConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
- dstConfig.APIToken = os.Getenv("ARVADOS_API_TOKEN")
+ dstConfig.APIToken = arvadostest.DataManagerToken
dstConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
if enforcePermissions {
c.Check(err, IsNil)
c.Assert(srcConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
- c.Assert(srcConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+ c.Assert(srcConfig.APIToken, Equals, arvadostest.DataManagerToken)
c.Assert(srcConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
c.Assert(srcConfig.ExternalClient, Equals, false)
c.Check(err, IsNil)
c.Assert(dstConfig.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
- c.Assert(dstConfig.APIToken, Equals, os.Getenv("ARVADOS_API_TOKEN"))
+ c.Assert(dstConfig.APIToken, Equals, arvadostest.DataManagerToken)
c.Assert(dstConfig.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
c.Assert(dstConfig.ExternalClient, Equals, false)
c.Check(err, IsNil)
fileContent := "ARVADOS_API_HOST=" + os.Getenv("ARVADOS_API_HOST") + "\n"
- fileContent += "ARVADOS_API_TOKEN=" + os.Getenv("ARVADOS_API_TOKEN") + "\n"
+ fileContent += "ARVADOS_API_TOKEN=" + arvadostest.DataManagerToken + "\n"
fileContent += "ARVADOS_API_HOST_INSECURE=" + os.Getenv("ARVADOS_API_HOST_INSECURE") + "\n"
fileContent += "ARVADOS_EXTERNAL_CLIENT=false\n"
fileContent += "ARVADOS_BLOB_SIGNING_KEY=abcdefg"