closes #9514
authorradhika <radhika@curoverse.com>
Mon, 26 Sep 2016 20:03:17 +0000 (16:03 -0400)
committerradhika <radhika@curoverse.com>
Mon, 26 Sep 2016 20:03:17 +0000 (16:03 -0400)
Merge branch '9514-only-delete-old-container-logs'

31 files changed:
build/run-build-packages.sh
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arv-cwl-schema.yml [new file with mode: 0644]
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_job.py
sdk/cwl/tests/wf/scatter2.cwl [new file with mode: 0644]
sdk/cwl/tests/wf/scatter2_subwf.cwl [new file with mode: 0644]
sdk/go/arvados/duration.go
sdk/go/arvadosclient/arvadosclient.go
sdk/go/arvadosclient/pool.go
sdk/go/arvadostest/run_servers.go
services/api/app/models/arvados_model.rb
services/arv-git-httpd/arv-git-httpd.service [new file with mode: 0644]
services/arv-git-httpd/auth_handler.go
services/arv-git-httpd/git_handler.go
services/arv-git-httpd/git_handler_test.go
services/arv-git-httpd/gitolite_test.go
services/arv-git-httpd/integration_test.go
services/arv-git-httpd/main.go
services/arv-git-httpd/server.go
services/arv-git-httpd/usage.go [new file with mode: 0644]
services/fuse/tests/test_mount.py
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy.service [new file with mode: 0644]
services/keepproxy/usage.go [new file with mode: 0644]

index 08b60f654e3f67251f10d59ebdaa0fda3f5c7856..3acabab758aae96adf7f1b61d9ae7fae4eaf1ec9 100755 (executable)
@@ -476,7 +476,7 @@ fpm_build ruamel.yaml "" "" python 0.12.4 --python-setup-py-arguments "--single-
 fpm_build cwltest "" "" python 1.0.20160907111242
 
 # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20160922135240
+fpm_build cwltool "" "" python 1.0.20160923180109
 
 # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
 fpm_build rdflib-jsonld "" "" python 0.3.0
index 7bfdba8b80e48d9cb24d7054933b428aa3729764..cd38003daef65d1c64007818c193ce2bd3398513 100644 (file)
@@ -15,6 +15,7 @@ import pkg_resources  # part of setuptools
 from cwltool.errors import WorkflowException
 import cwltool.main
 import cwltool.workflow
+import schema_salad
 
 import arvados
 import arvados.config
@@ -22,9 +23,10 @@ import arvados.config
 from .arvcontainer import ArvadosContainer, RunnerContainer
 from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
 from .arvtool import ArvadosCommandTool
+from .arvworkflow import ArvadosWorkflow, upload_workflow
 from .fsaccess import CollectionFsAccess
-from .arvworkflow import make_workflow
 from .perf import Perf
+from cwltool.pack import pack
 
 from cwltool.process import shortname, UnsupportedRequirement
 from cwltool.pathmapper import adjustFileObjs
@@ -53,6 +55,7 @@ class ArvCwlRunner(object):
         self.work_api = work_api
         self.stop_polling = threading.Event()
         self.poll_api = None
+        self.pipeline = None
 
         if self.work_api is None:
             # todo: autodetect API to use.
@@ -61,10 +64,12 @@ class ArvCwlRunner(object):
         if self.work_api not in ("containers", "jobs"):
             raise Exception("Unsupported API '%s'" % self.work_api)
 
-    def arvMakeTool(self, toolpath_object, **kwargs):
+    def arv_make_tool(self, toolpath_object, **kwargs):
+        kwargs["work_api"] = self.work_api
         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
-            kwargs["work_api"] = self.work_api
             return ArvadosCommandTool(self, toolpath_object, **kwargs)
+        elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
+            return ArvadosWorkflow(self, toolpath_object, **kwargs)
         else:
             return cwltool.workflow.defaultMakeTool(toolpath_object, **kwargs)
 
@@ -155,18 +160,11 @@ class ArvCwlRunner(object):
             for v in obj:
                 self.check_writable(v)
 
-    def arvExecutor(self, tool, job_order, **kwargs):
+    def arv_executor(self, tool, job_order, **kwargs):
         self.debug = kwargs.get("debug")
 
         tool.visit(self.check_writable)
 
-        if kwargs.get("quiet"):
-            logger.setLevel(logging.WARN)
-            logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
-
-        if self.debug:
-            logger.setLevel(logging.DEBUG)
-
         useruuid = self.api.users().current().execute()["uuid"]
         self.project_uuid = kwargs.get("project_uuid") if kwargs.get("project_uuid") else useruuid
         self.pipeline = None
@@ -180,7 +178,7 @@ class ArvCwlRunner(object):
             return tmpl.uuid
 
         if kwargs.get("create_workflow") or kwargs.get("update_workflow"):
-            return make_workflow(self, tool, job_order, self.project_uuid, kwargs.get("update_workflow"))
+            return upload_workflow(self, tool, job_order, self.project_uuid, kwargs.get("update_workflow"))
 
         self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")
 
@@ -367,6 +365,16 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
 
     return parser
 
+def add_arv_hints():
+    cache = {}
+    res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema.yml')
+    cache["http://arvados.org/cwl"] = res.read()
+    res.close()
+    _, cwlnames, _, _ = cwltool.process.get_schema("v1.0")
+    _, extnames, _, _ = schema_salad.schema.load_schema("http://arvados.org/cwl", cache=cache)
+    for n in extnames.names:
+        if not cwlnames.has_name("http://arvados.org/cwl#"+n, ""):
+            cwlnames.add_name("http://arvados.org/cwl#"+n, "", extnames.get_name(n, ""))
 
 def main(args, stdout, stderr, api_client=None):
     parser = arg_parser()
@@ -376,6 +384,8 @@ def main(args, stdout, stderr, api_client=None):
     if (arvargs.create_template or arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
         job_order_object = ({}, "")
 
+    add_arv_hints()
+
     try:
         if api_client is None:
             api_client=arvados.api('v1', model=OrderedJsonModel())
@@ -384,14 +394,21 @@ def main(args, stdout, stderr, api_client=None):
         logger.error(e)
         return 1
 
+    if arvargs.debug:
+        logger.setLevel(logging.DEBUG)
+
+    if arvargs.quiet:
+        logger.setLevel(logging.WARN)
+        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
+
     arvargs.conformance_test = None
     arvargs.use_container = True
 
     return cwltool.main.main(args=arvargs,
                              stdout=stdout,
                              stderr=stderr,
-                             executor=runner.arvExecutor,
-                             makeTool=runner.arvMakeTool,
+                             executor=runner.arv_executor,
+                             makeTool=runner.arv_make_tool,
                              versionfunc=versionstring,
                              job_order_object=job_order_object,
                              make_fs_access=partial(CollectionFsAccess, api_client=api_client))
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
new file mode 100644 (file)
index 0000000..44b1b06
--- /dev/null
@@ -0,0 +1,47 @@
+$base: "http://arvados.org/cwl#"
+$graph:
+- name: RunInSingleContainer
+  type: record
+  doc: |
+    Indicates that a subworkflow should run in a single container
+    and not be scheduled as separate steps.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:RunInSingleContainer'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+
+- name: RuntimeConstraints
+  type: record
+  doc: |
+    Set Arvados-specific runtime hints.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:RuntimeConstraints'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    - name: keep_cache
+      type: int?
+      doc: |
+        Size of file data buffer for Keep mount in MiB. Default is 256
+        MiB. Increase this to reduce cache thrashing in situations such as
+        accessing multiple large (64+ MiB) files at the same time, or
+        performing random access on a large file.
+
+- name: APIRequirement
+  type: record
+  doc: |
+    Indicates that process wants to access to the Arvados API.  Will be granted
+    limited network access and have ARVADOS_API_HOST and ARVADOS_API_TOKEN set
+    in the environment.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'arv:APIRequirement'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
index 413435db2392e38aa22ba0d5222fef333080dbde..aaae7d9f66ce9d72a2dcade588f58f0e5f6894e9 100644 (file)
@@ -90,6 +90,14 @@ class ArvadosContainer(object):
             runtime_constraints["vcpus"] = resources.get("cores", 1)
             runtime_constraints["ram"] = resources.get("ram") * 2**20
 
+        api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement")
+        if api_req:
+            runtime_constraints["API"] = True
+
+        runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
+        if runtime_req:
+            logger.warn("RuntimeConstraints not yet supported by container API")
+
         container_request["mounts"] = mounts
         container_request["runtime_constraints"] = runtime_constraints
 
index e6d0cba65fa00b6c0a185861d3dde89db2c3e4e9..10980024812c2fce285addec2a7f4f237cc3f6f1 100644 (file)
@@ -1,6 +1,7 @@
 import logging
 import re
 import copy
+import json
 
 from cwltool.process import get_feature, shortname
 from cwltool.errors import WorkflowException
@@ -84,6 +85,10 @@ class ArvadosJob(object):
             runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
             runtime_constraints["min_scratch_mb_per_node"] = resources.get("tmpdirSize", 0) + resources.get("outdirSize", 0)
 
+        runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
+        if runtime_req:
+            runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
+
         filters = [["repository", "=", "arvados"],
                    ["script", "=", "crunchrunner"],
                    ["script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"]]
@@ -185,9 +190,11 @@ class ArvadosJob(object):
             except WorkflowException as e:
                 logger.error("Error while collecting job outputs:\n%s", e, exc_info=(e if self.arvrunner.debug else False))
                 processStatus = "permanentFail"
+                outputs = None
             except Exception as e:
                 logger.exception("Got unknown exception while collecting job outputs:")
                 processStatus = "permanentFail"
+                outputs = None
 
             self.output_callback(outputs, processStatus)
         finally:
index 8e45890a8d6226d9387524e1dc330effe900e163..ab8ad035fd6c8d13d14b3d226135993e6c0792ce 100644 (file)
@@ -1,16 +1,22 @@
 import os
 import json
 import copy
+import logging
 
 from cwltool.pack import pack
 from cwltool.load_tool import fetch_document
 from cwltool.process import shortname
+from cwltool.workflow import Workflow, WorkflowException
+from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
 
 import ruamel.yaml as yaml
 
 from .runner import upload_docker, upload_dependencies
+from .arvtool import ArvadosCommandTool
 
-def make_workflow(arvRunner, tool, job_order, project_uuid, update_uuid):
+logger = logging.getLogger('arvados.cwl-runner')
+
+def upload_workflow(arvRunner, tool, job_order, project_uuid, update_uuid):
     upload_docker(arvRunner, tool)
 
     document_loader, workflowobj, uri = (tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]), tool.tool["id"])
@@ -39,3 +45,78 @@ def make_workflow(arvRunner, tool, job_order, project_uuid, update_uuid):
         return arvRunner.api.workflows().update(uuid=update_uuid, body=body).execute(num_retries=arvRunner.num_retries)["uuid"]
     else:
         return arvRunner.api.workflows().create(body=body).execute(num_retries=arvRunner.num_retries)["uuid"]
+
+class ArvadosWorkflow(Workflow):
+    """Wrap cwltool Workflow to override selected methods."""
+
+    def __init__(self, arvrunner, toolpath_object, **kwargs):
+        super(ArvadosWorkflow, self).__init__(toolpath_object, **kwargs)
+        self.arvrunner = arvrunner
+        self.work_api = kwargs["work_api"]
+
+    def job(self, joborder, output_callback, **kwargs):
+        kwargs["work_api"] = self.work_api
+        req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
+        if req:
+            document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
+
+            workflowobj["requirements"] = self.requirements + workflowobj.get("requirements", [])
+            workflowobj["hints"] = self.hints + workflowobj.get("hints", [])
+            packed = pack(document_loader, workflowobj, uri, self.metadata)
+
+            upload_dependencies(self.arvrunner,
+                                kwargs.get("name", ""),
+                                document_loader,
+                                packed,
+                                uri,
+                                False)
+
+            upload_dependencies(self.arvrunner,
+                                os.path.basename(joborder.get("id", "#")),
+                                document_loader,
+                                joborder,
+                                joborder.get("id", "#"),
+                                False)
+
+            joborder_keepmount = copy.deepcopy(joborder)
+
+            def keepmount(obj):
+                if obj["location"].startswith("keep:"):
+                    obj["location"] = "/keep/" + obj["location"][5:]
+                elif obj["location"].startswith("_:"):
+                    pass
+                else:
+                    raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
+                if "listing" in obj:
+                    del obj["listing"]
+            adjustFileObjs(joborder_keepmount, keepmount)
+            adjustDirObjs(joborder_keepmount, keepmount)
+            adjustFileObjs(packed, keepmount)
+            adjustDirObjs(packed, keepmount)
+
+            wf_runner = {
+                "class": "CommandLineTool",
+                "baseCommand": "cwltool",
+                "inputs": self.tool["inputs"],
+                "outputs": self.tool["outputs"],
+                "stdout": "cwl.output.json",
+                "requirements": workflowobj["requirements"]+[
+                    {"class": "InlineJavascriptRequirement"},
+                    {
+                    "class": "InitialWorkDirRequirement",
+                    "listing": [{
+                            "entryname": "workflow.cwl",
+                            "entry": yaml.safe_dump(packed).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
+                        }, {
+                            "entryname": "cwl.input.yml",
+                            "entry": yaml.safe_dump(joborder_keepmount).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
+                        }]
+                }],
+                "hints": workflowobj["hints"],
+                "arguments": ["--no-container", "--move-outputs", "workflow.cwl#main", "cwl.input.yml"]
+            }
+            kwargs["loader"] = self.doc_loader
+            kwargs["avsc_names"] = self.doc_schema
+            return ArvadosCommandTool(self.arvrunner, wf_runner, **kwargs).job(joborder, output_callback, **kwargs)
+        else:
+            return super(ArvadosWorkflow, self).job(joborder, output_callback, **kwargs)
index 40a64ffd20b5a769421f297c8a29cf35cec1d8d3..0fd9a0ed332ececaf4474ca3939fe9b4785db82d 100644 (file)
@@ -6,7 +6,7 @@ import os
 import arvados.commands.run
 import arvados.collection
 
-from cwltool.pathmapper import PathMapper, MapperEnt, abspath
+from cwltool.pathmapper import PathMapper, MapperEnt, abspath, adjustFileObjs, adjustDirObjs
 from cwltool.workflow import WorkflowException
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -42,8 +42,11 @@ class ArvPathMapper(PathMapper):
                     uploadfiles.add((src, ab, st))
                 elif isinstance(st, arvados.commands.run.ArvFile):
                     self._pathmap[src] = MapperEnt(ab, st.fn, "File")
-                elif src.startswith("_:") and "contents" in srcobj:
-                    pass
+                elif src.startswith("_:"):
+                    if "contents" in srcobj:
+                        pass
+                    else:
+                        raise WorkflowException("File literal '%s' is missing contents" % src)
                 else:
                     raise WorkflowException("Input file path '%s' is invalid" % st)
             if "secondaryFiles" in srcobj:
@@ -52,7 +55,7 @@ class ArvPathMapper(PathMapper):
         elif srcobj["class"] == "Directory":
             if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
                 self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], "Directory")
-            for l in srcobj["listing"]:
+            for l in srcobj.get("listing", []):
                 self.visit(l, uploadfiles)
 
     def addentry(self, obj, c, path, subdirs):
index 155c6a05039ec65649975d038073185fbc5c4338..cf2f1db997b9db9688b52258994bc6179863783d 100644 (file)
@@ -69,25 +69,19 @@ def upload_dependencies(arvrunner, name, document_loader,
                   set(("$include", "$schemas", "location")),
                   loadref)
 
-    files = []
-    def visitFiles(path):
-        files.append(path)
-
-    adjustFileObjs(sc, visitFiles)
-    adjustDirObjs(sc, visitFiles)
-
-    normalizeFilesDirs(files)
+    normalizeFilesDirs(sc)
 
     if "id" in workflowobj:
-        files.append({"class": "File", "location": workflowobj["id"]})
+        sc.append({"class": "File", "location": workflowobj["id"]})
 
-    mapper = ArvPathMapper(arvrunner, files, "",
+    mapper = ArvPathMapper(arvrunner, sc, "",
                            "keep:%s",
                            "keep:%s/%s",
                            name=name)
 
     def setloc(p):
-        p["location"] = mapper.mapper(p["location"]).target
+        if not p["location"].startswith("_:") and not p["location"].startswith("keep:"):
+            p["location"] = mapper.mapper(p["location"]).target
     adjustFileObjs(workflowobj, setloc)
     adjustDirObjs(workflowobj, setloc)
 
index 87351922b06fe2f7d8852d53575fef03a68d38ee..fc5a52c588cba05395f0fe4064027c6a6be3aebd 100644 (file)
@@ -25,6 +25,7 @@ setup(name='arvados-cwl-runner',
       download_url="https://github.com/curoverse/arvados.git",
       license='Apache 2.0',
       packages=find_packages(),
+      package_data={'arvados_cwl': ['arv-cwl-schema.yml']},
       scripts=[
           'bin/cwl-runner',
           'bin/arvados-cwl-runner'
@@ -32,7 +33,7 @@ setup(name='arvados-cwl-runner',
       # Make sure to update arvados/build/run-build-packages.sh as well
       # when updating the cwltool version pin.
       install_requires=[
-          'cwltool==1.0.20160922135240',
+          'cwltool==1.0.20160923180109',
           'arvados-python-client>=0.1.20160826210445'
       ],
       data_files=[
index 7d941cf8fc7c5d5f1caf04d5b60a8aa2b67c1019..822a213fe7a387383eaddec8e75386c785353d6c 100644 (file)
@@ -86,6 +86,11 @@ class TestContainer(unittest.TestCase):
                 "coresMin": 3,
                 "ramMin": 3000,
                 "tmpdirMin": 4000
+            }, {
+                "class": "http://arvados.org/cwl#RuntimeConstraints",
+                "keep_cache": 512
+            }, {
+                "class": "http://arvados.org/cwl#APIRequirement",
             }],
             "baseCommand": "ls"
         }
@@ -107,7 +112,8 @@ class TestContainer(unittest.TestCase):
                 'name': 'test_resource_requirements',
                 'runtime_constraints': {
                     'vcpus': 3,
-                    'ram': 3145728000
+                    'ram': 3145728000,
+                    'API': True
                 }, 'priority': 1,
                 'mounts': {
                     '/var/spool/cwl': {'kind': 'tmp'}
index 6189a2de351ff44b1d08d8937ca632eef390656a..e36a6076ab72de97ca446a34af2891906417250c 100644 (file)
@@ -1,12 +1,12 @@
-import arvados_cwl
 import logging
 import mock
 import unittest
 import os
 import functools
-import cwltool.process
-from schema_salad.ref_resolver import Loader
+import json
 
+import arvados_cwl
+import cwltool.process
 from schema_salad.ref_resolver import Loader
 
 if not os.getenv('ARVADOS_DEBUG'):
@@ -80,6 +80,11 @@ class TestJob(unittest.TestCase):
                 "coresMin": 3,
                 "ramMin": 3000,
                 "tmpdirMin": 4000
+            }, {
+                "class": "http://arvados.org/cwl#RuntimeConstraints",
+                "keep_cache": 512
+            }, {
+                "class": "http://arvados.org/cwl#APIRequirement",
             }],
             "baseCommand": "ls"
         }
@@ -107,7 +112,8 @@ class TestJob(unittest.TestCase):
                     'docker_image': 'arvados/jobs',
                     'min_cores_per_node': 3,
                     'min_ram_mb_per_node': 3000,
-                    'min_scratch_mb_per_node': 5024 # tmpdirSize + outdirSize
+                    'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize
+                    'keep_cache_mb_per_task': 512
                 }
             },
             find_or_create=True,
@@ -193,3 +199,65 @@ class TestJob(unittest.TestCase):
             mock.call().execute(num_retries=0)])
 
         self.assertFalse(api.collections().create.called)
+
+
+class TestWorkflow(unittest.TestCase):
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados.collection.Collection")
+    def test_run(self, mockcollection):
+        arvados_cwl.add_arv_hints()
+
+        runner = arvados_cwl.ArvCwlRunner(mock.MagicMock())
+        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
+        runner.ignore_docker_for_reuse = False
+        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
+
+        tool, metadata = document_loader.resolve_ref("tests/wf/scatter2.cwl")
+        metadata["cwlVersion"] = tool["cwlVersion"]
+
+        mockcollection().portable_data_hash.return_value = "99999999999999999999999999999999+118"
+
+        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
+        arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, work_api="jobs", avsc_names=avsc_names,
+                                              basedir="", make_fs_access=make_fs_access, loader=document_loader,
+                                              makeTool=runner.arv_make_tool, metadata=metadata)
+        arvtool.formatgraph = None
+        it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access)
+        it.next().run()
+        it.next().run()
+
+        with open("tests/wf/scatter2_subwf.cwl") as f:
+            subwf = f.read()
+
+        mockcollection().open().__enter__().write.assert_has_calls([mock.call(subwf)])
+        mockcollection().open().__enter__().write.assert_has_calls([mock.call('{sleeptime: 5}')])
+
+        runner.api.jobs().create.assert_called_with(
+            body={
+                'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+                'repository': 'arvados',
+                'script_version': 'master',
+                'script': 'crunchrunner',
+                'script_parameters': {
+                    'tasks': [{'task.env': {
+                        'HOME': '$(task.outdir)',
+                        'TMPDIR': '$(task.tmpdir)'},
+                               'task.vwd': {
+                                   'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl',
+                                   'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml'
+                               },
+                    'command': [u'cwltool', u'--no-container', u'--move-outputs', u'workflow.cwl#main', u'cwl.input.yml'],
+                    'task.stdout': 'cwl.output.json'}]},
+                'runtime_constraints': {
+                    'min_scratch_mb_per_node': 2048,
+                    'min_cores_per_node': 1,
+                    'docker_image': 'arvados/jobs',
+                    'min_ram_mb_per_node': 1024
+                },
+                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'},
+            filters=[['repository', '=', 'arvados'],
+                     ['script', '=', 'crunchrunner'],
+                     ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+                     ['docker_image_locator', 'in docker', 'arvados/jobs']],
+            find_or_create=True)
diff --git a/sdk/cwl/tests/wf/scatter2.cwl b/sdk/cwl/tests/wf/scatter2.cwl
new file mode 100644 (file)
index 0000000..f73ec2b
--- /dev/null
@@ -0,0 +1,56 @@
+class: Workflow
+cwlVersion: v1.0
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+inputs:
+  sleeptime:
+    type: int[]
+    default: [5]
+outputs:
+  out:
+    type: string[]
+    outputSource: scatterstep/out
+requirements:
+  SubworkflowFeatureRequirement: {}
+  ScatterFeatureRequirement: {}
+  InlineJavascriptRequirement: {}
+  StepInputExpressionRequirement: {}
+steps:
+  scatterstep:
+    in:
+      sleeptime: sleeptime
+    out: [out]
+    scatter: sleeptime
+    hints:
+      - class: arv:RunInSingleContainer
+    run:
+      class: Workflow
+      id: mysub
+      inputs:
+        sleeptime: int
+      outputs:
+        out:
+          type: string
+          outputSource: sleep1/out
+      steps:
+        sleep1:
+          in:
+            sleeptime: sleeptime
+            blurb:
+              valueFrom: |
+                ${
+                  return String(inputs.sleeptime) + "b";
+                }
+          out: [out]
+          run:
+            class: CommandLineTool
+            inputs:
+              sleeptime:
+                type: int
+                inputBinding: {position: 1}
+            outputs:
+              out:
+                type: string
+                outputBinding:
+                  outputEval: "out"
+            baseCommand: sleep
diff --git a/sdk/cwl/tests/wf/scatter2_subwf.cwl b/sdk/cwl/tests/wf/scatter2_subwf.cwl
new file mode 100644 (file)
index 0000000..0ae1cf0
--- /dev/null
@@ -0,0 +1,33 @@
+$graph:
+- class: Workflow
+  hints:
+  - {class: 'http://arvados.org/cwl#RunInSingleContainer'}
+  id: '#main'
+  inputs:
+  - {id: '#main/sleeptime', type: int}
+  outputs:
+  - {id: '#main/out', outputSource: '#main/sleep1/out', type: string}
+  requirements:
+  - {class: InlineJavascriptRequirement}
+  - {class: ScatterFeatureRequirement}
+  - {class: StepInputExpressionRequirement}
+  - {class: SubworkflowFeatureRequirement}
+  steps:
+  - id: '#main/sleep1'
+    in:
+    - {id: '#main/sleep1/blurb', valueFrom: "${\n  return String(inputs.sleeptime)\
+        \ + \"b\";\n}\n"}
+    - {id: '#main/sleep1/sleeptime', source: '#main/sleeptime'}
+    out: ['#main/sleep1/out']
+    run:
+      baseCommand: sleep
+      class: CommandLineTool
+      inputs:
+      - id: '#main/sleep1/sleeptime'
+        inputBinding: {position: 1}
+        type: int
+      outputs:
+      - id: '#main/sleep1/out'
+        outputBinding: {outputEval: out}
+        type: string
+cwlVersion: v1.0
\ No newline at end of file
index 1639c5852a6573ca2f51deb0080558cec587e464..7b87aee6ab7b14c875aa044e3b88e11ed22bd567 100644 (file)
@@ -13,9 +13,7 @@ type Duration time.Duration
 // UnmarshalJSON implements json.Unmarshaler
 func (d *Duration) UnmarshalJSON(data []byte) error {
        if data[0] == '"' {
-               dur, err := time.ParseDuration(string(data[1 : len(data)-1]))
-               *d = Duration(dur)
-               return err
+               return d.Set(string(data[1 : len(data)-1]))
        }
        return fmt.Errorf("duration must be given as a string like \"600s\" or \"1h30m\"")
 }
@@ -29,3 +27,10 @@ func (d *Duration) MarshalJSON() ([]byte, error) {
 func (d Duration) String() string {
        return time.Duration(d).String()
 }
+
+// Value implements flag.Value
+func (d *Duration) Set(s string) error {
+       dur, err := time.ParseDuration(s)
+       *d = Duration(dur)
+       return err
+}
index aeb81f9317e871c81156ba759b163f9aeeb5f08e..e3cbfcf13effab33a5c53e12e6b1a9e4e03b18ac 100644 (file)
@@ -15,6 +15,8 @@ import (
        "regexp"
        "strings"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
 )
 
 type StringMatcher func(string) bool
@@ -101,6 +103,24 @@ type ArvadosClient struct {
        Retries int
 }
 
+// New returns an ArvadosClient using the given arvados.Client
+// configuration. This is useful for callers who load arvados.Client
+// fields from configuration files but still need to use the
+// arvadosclient.ArvadosClient package.
+func New(c *arvados.Client) (*ArvadosClient, error) {
+       return &ArvadosClient{
+               Scheme:      "https",
+               ApiServer:   c.APIHost,
+               ApiToken:    c.AuthToken,
+               ApiInsecure: c.Insecure,
+               Client: &http.Client{Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: c.Insecure}}},
+               External:          false,
+               Retries:           2,
+               lastClosedIdlesAt: time.Now(),
+       }, nil
+}
+
 // MakeArvadosClient creates a new ArvadosClient using the standard
 // environment variables ARVADOS_API_HOST, ARVADOS_API_TOKEN,
 // ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT, and
index 87b67c39b67c6f3ee95a165f9cc50f83ba5be3a7..26b3518798e7c645ed3cf6ce640d3a9eb6d068cd 100644 (file)
@@ -10,22 +10,32 @@ import (
 // credentials. See arvados-git-httpd for an example, and sync.Pool
 // for more information about garbage collection.
 type ClientPool struct {
-       sync.Pool
-       lastErr error
+       // Initialize new clients by coping this one.
+       Prototype *ArvadosClient
+
+       pool      *sync.Pool
+       lastErr   error
+       setupOnce sync.Once
 }
 
-// MakeClientPool returns a new empty ClientPool.
+// MakeClientPool returns a new empty ClientPool, using environment
+// variables to initialize the prototype.
 func MakeClientPool() *ClientPool {
-       p := &ClientPool{}
-       p.Pool = sync.Pool{New: func() interface{} {
-               arv, err := MakeArvadosClient()
-               if err != nil {
-                       p.lastErr = err
+       proto, err := MakeArvadosClient()
+       return &ClientPool{
+               Prototype: &proto,
+               lastErr:   err,
+       }
+}
+
+func (p *ClientPool) setup() {
+       p.pool = &sync.Pool{New: func() interface{} {
+               if p.lastErr != nil {
                        return nil
                }
-               return &arv
+               c := *p.Prototype
+               return &c
        }}
-       return p
 }
 
 // Err returns the error that was encountered last time Get returned
@@ -39,7 +49,8 @@ func (p *ClientPool) Err() error {
 // (including its ApiToken) will be just as it was when it was Put
 // back in the pool.
 func (p *ClientPool) Get() *ArvadosClient {
-       c, ok := p.Pool.Get().(*ArvadosClient)
+       p.setupOnce.Do(p.setup)
+       c, ok := p.pool.Get().(*ArvadosClient)
        if !ok {
                return nil
        }
@@ -48,5 +59,6 @@ func (p *ClientPool) Get() *ArvadosClient {
 
 // Put puts an ArvadosClient back in the pool.
 func (p *ClientPool) Put(c *ArvadosClient) {
-       p.Pool.Put(c)
+       p.setupOnce.Do(p.setup)
+       p.pool.Put(c)
 }
index 7edc482639e585effa23629552fd3a1bea216be8..d3b48ea9fb738fb897ce2ed0926dc619c4522bbf 100644 (file)
@@ -22,6 +22,15 @@ func ResetEnv() {
        }
 }
 
+// APIHost returns the address:port of the current test server.
+func APIHost() string {
+       h := authSettings["ARVADOS_API_HOST"]
+       if h == "" {
+               log.Fatal("arvadostest.APIHost() was called but authSettings is not populated")
+       }
+       return h
+}
+
 // ParseAuthSettings parses auth settings from given input
 func ParseAuthSettings(authScript []byte) {
        scanner := bufio.NewScanner(bytes.NewReader(authScript))
index 7d1c30cdaf63b418086a265c5129a103d168cb67..672374bc6c768f4f7bd8be0ba81daf2fbefa1629 100644 (file)
@@ -530,7 +530,7 @@ class ArvadosModel < ActiveRecord::Base
   end
 
   def self.uuid_like_pattern
-    "_____-#{uuid_prefix}-_______________"
+    "#{Rails.configuration.uuid_prefix}-#{uuid_prefix}-_______________"
   end
 
   def self.uuid_regex
diff --git a/services/arv-git-httpd/arv-git-httpd.service b/services/arv-git-httpd/arv-git-httpd.service
new file mode 100644 (file)
index 0000000..1182a0e
--- /dev/null
@@ -0,0 +1,12 @@
+[Unit]
+Description=Arvados git server
+Documentation=https://doc.arvados.org/
+After=network.target
+
+[Service]
+Type=notify
+ExecStart=/usr/bin/arv-git-httpd
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
index fccb0c9576864634481a2e69b7237def54b6f0ec..9f92cd1b7213f5a720a771ae349fac7dd6558b39 100644 (file)
@@ -5,6 +5,7 @@ import (
        "net/http"
        "os"
        "strings"
+       "sync"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
@@ -12,13 +13,24 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
 )
 
-var clientPool = arvadosclient.MakeClientPool()
-
 type authHandler struct {
-       handler http.Handler
+       handler    http.Handler
+       clientPool *arvadosclient.ClientPool
+       setupOnce  sync.Once
+}
+
+func (h *authHandler) setup() {
+       ac, err := arvadosclient.New(&theConfig.Client)
+       if err != nil {
+               log.Fatal(err)
+       }
+       h.clientPool = &arvadosclient.ClientPool{Prototype: ac}
+       log.Printf("%+v", h.clientPool.Prototype)
 }
 
 func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+       h.setupOnce.Do(h.setup)
+
        var statusCode int
        var statusText string
        var apiToken string
@@ -68,12 +80,12 @@ func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        repoName = pathParts[0]
        repoName = strings.TrimRight(repoName, "/")
 
-       arv := clientPool.Get()
+       arv := h.clientPool.Get()
        if arv == nil {
-               statusCode, statusText = http.StatusInternalServerError, "connection pool failed: "+clientPool.Err().Error()
+               statusCode, statusText = http.StatusInternalServerError, "connection pool failed: "+h.clientPool.Err().Error()
                return
        }
-       defer clientPool.Put(arv)
+       defer h.clientPool.Put(arv)
 
        // Ask API server whether the repository is readable using
        // this token (by trying to read it!)
@@ -129,7 +141,7 @@ func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                "/" + repoName + "/.git",
        }
        for _, dir := range tryDirs {
-               if fileInfo, err := os.Stat(theConfig.Root + dir); err != nil {
+               if fileInfo, err := os.Stat(theConfig.RepoRoot + dir); err != nil {
                        if !os.IsNotExist(err) {
                                statusCode, statusText = http.StatusInternalServerError, err.Error()
                                return
@@ -141,7 +153,7 @@ func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
        if rewrittenPath == "" {
                log.Println("WARNING:", repoUUID,
-                       "git directory not found in", theConfig.Root, tryDirs)
+                       "git directory not found in", theConfig.RepoRoot, tryDirs)
                // We say "content not found" to disambiguate from the
                // earlier "API says that repo does not exist" error.
                statusCode, statusText = http.StatusNotFound, "content not found"
index 0312b296fc938da2f4950de7ca9240a5c0550825..f0b98fab72382dfa02c2b12a144e2a6b9f5190c4 100644 (file)
@@ -19,11 +19,11 @@ func newGitHandler() http.Handler {
        return &gitHandler{
                Handler: cgi.Handler{
                        Path: theConfig.GitCommand,
-                       Dir:  theConfig.Root,
+                       Dir:  theConfig.RepoRoot,
                        Env: []string{
-                               "GIT_PROJECT_ROOT=" + theConfig.Root,
+                               "GIT_PROJECT_ROOT=" + theConfig.RepoRoot,
                                "GIT_HTTP_EXPORT_ALL=",
-                               "SERVER_ADDR=" + theConfig.Addr,
+                               "SERVER_ADDR=" + theConfig.Listen,
                        },
                        InheritEnv: []string{
                                "PATH",
index 35c2f4884f4f99e2894c5125776edfb0db32895c..d87162dca3aa6f80ac16411c4a138e6286fc40e2 100644 (file)
@@ -37,7 +37,7 @@ func (s *GitHandlerSuite) TestEnvVars(c *check.C) {
        c.Check(body, check.Matches, `(?ms).*^GL_BYPASS_ACCESS_CHECKS=yesplease$.*`)
        c.Check(body, check.Matches, `(?ms).*^REMOTE_HOST=::1$.*`)
        c.Check(body, check.Matches, `(?ms).*^REMOTE_PORT=12345$.*`)
-       c.Check(body, check.Matches, `(?ms).*^SERVER_ADDR=`+regexp.QuoteMeta(theConfig.Addr)+`$.*`)
+       c.Check(body, check.Matches, `(?ms).*^SERVER_ADDR=`+regexp.QuoteMeta(theConfig.Listen)+`$.*`)
 }
 
 func (s *GitHandlerSuite) TestCGIErrorOnSplitHostPortError(c *check.C) {
index 20bdae7ec13a5534ebd4f69248869d4980688fa7..74c2b8cf4d91a8ac3da2835b12e90a35e6dd0380 100644 (file)
@@ -6,6 +6,8 @@ import (
        "os/exec"
        "strings"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        check "gopkg.in/check.v1"
 )
 
@@ -41,10 +43,14 @@ func (s *GitoliteSuite) SetUpTest(c *check.C) {
        runGitolite("gitolite", "setup", "--admin", "root")
 
        s.tmpRepoRoot = s.gitoliteHome + "/repositories"
-       s.Config = &config{
-               Addr:       ":0",
+       s.Config = &Config{
+               Client: arvados.Client{
+                       APIHost:  arvadostest.APIHost(),
+                       Insecure: true,
+               },
+               Listen:     ":0",
                GitCommand: "/usr/share/gitolite3/gitolite-shell",
-               Root:       s.tmpRepoRoot,
+               RepoRoot:   s.tmpRepoRoot,
        }
        s.IntegrationSuite.SetUpTest(c)
 
@@ -62,6 +68,10 @@ func (s *GitoliteSuite) TearDownTest(c *check.C) {
        // upgrade to Go 1.4.
        os.Setenv("GITOLITE_HTTP_HOME", "")
        os.Setenv("GL_BYPASS_ACCESS_CHECKS", "")
+       if s.gitoliteHome != "" {
+               err := os.RemoveAll(s.gitoliteHome)
+               c.Check(err, check.Equals, nil)
+       }
        s.IntegrationSuite.TearDownTest(c)
 }
 
index 61d83ff8e85a0da8b6579b8a86ffd8c50d5b2551..5e55eca754838d97d2aaa8888482c686306a42cf 100644 (file)
@@ -8,6 +8,7 @@ import (
        "strings"
        "testing"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        check "gopkg.in/check.v1"
 )
@@ -23,7 +24,7 @@ type IntegrationSuite struct {
        tmpRepoRoot string
        tmpWorkdir  string
        testServer  *server
-       Config      *config
+       Config      *Config
 }
 
 func (s *IntegrationSuite) SetUpSuite(c *check.C) {
@@ -67,19 +68,27 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
        c.Assert(err, check.Equals, nil)
 
        if s.Config == nil {
-               s.Config = &config{
-                       Addr:       ":0",
+               s.Config = &Config{
+                       Client: arvados.Client{
+                               APIHost:  arvadostest.APIHost(),
+                               Insecure: true,
+                       },
+                       Listen:     ":0",
                        GitCommand: "/usr/bin/git",
-                       Root:       s.tmpRepoRoot,
+                       RepoRoot:   s.tmpRepoRoot,
                }
        }
+
+       // Clear ARVADOS_API_* env vars before starting up the server,
+       // to make sure arv-git-httpd doesn't use them or complain
+       // about them being missing.
+       os.Unsetenv("ARVADOS_API_HOST")
+       os.Unsetenv("ARVADOS_API_HOST_INSECURE")
+       os.Unsetenv("ARVADOS_API_TOKEN")
+
        theConfig = s.Config
        err = s.testServer.Start()
        c.Assert(err, check.Equals, nil)
-
-       // Clear ARVADOS_API_TOKEN after starting up the server, to
-       // make sure arv-git-httpd doesn't use it.
-       os.Setenv("ARVADOS_API_TOKEN", "unused-token-placates-client-library")
 }
 
 func (s *IntegrationSuite) TearDownTest(c *check.C) {
index 98695c9a9df806164afc59c128e204cb52547cc9..40dcf3265ff120074ac7d89a286b09cf6040002e 100644 (file)
@@ -1,49 +1,80 @@
 package main
 
 import (
+       "encoding/json"
        "flag"
        "log"
        "os"
+       "regexp"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/config"
+       "github.com/coreos/go-systemd/daemon"
 )
 
-type config struct {
-       Addr       string
+// Server configuration
+type Config struct {
+       Client     arvados.Client
+       Listen     string
        GitCommand string
-       Root       string
+       RepoRoot   string
 }
 
-var theConfig *config
+var theConfig = defaultConfig()
 
-func init() {
-       theConfig = &config{}
-       flag.StringVar(&theConfig.Addr, "address", "0.0.0.0:80",
-               "Address to listen on, \"host:port\".")
-       flag.StringVar(&theConfig.GitCommand, "git-command", "/usr/bin/git",
-               "Path to git or gitolite-shell executable. Each authenticated request will execute this program with a single argument, \"http-backend\".")
+func defaultConfig() *Config {
        cwd, err := os.Getwd()
        if err != nil {
                log.Fatalln("Getwd():", err)
        }
-       flag.StringVar(&theConfig.Root, "repo-root", cwd,
-               "Path to git repositories.")
-
-       // MakeArvadosClient returns an error if token is unset (even
-       // though we don't need to do anything requiring
-       // authentication yet). We can't do this in newArvadosClient()
-       // just before calling MakeArvadosClient(), though, because
-       // that interferes with the env var needed by "run test
-       // servers".
-       os.Setenv("ARVADOS_API_TOKEN", "xxx")
+       return &Config{
+               Listen:     ":80",
+               GitCommand: "/usr/bin/git",
+               RepoRoot:   cwd,
+       }
 }
 
-func main() {
+func init() {
+       const defaultCfgPath = "/etc/arvados/arv-git-httpd/config.json"
+       const deprecated = " (DEPRECATED -- use config file instead)"
+       flag.StringVar(&theConfig.Listen, "address", theConfig.Listen,
+               "Address to listen on, \"host:port\" or \":port\"."+deprecated)
+       flag.StringVar(&theConfig.GitCommand, "git-command", theConfig.GitCommand,
+               "Path to git or gitolite-shell executable. Each authenticated request will execute this program with a single argument, \"http-backend\"."+deprecated)
+       flag.StringVar(&theConfig.RepoRoot, "repo-root", theConfig.RepoRoot,
+               "Path to git repositories."+deprecated)
+
+       cfgPath := flag.String("config", defaultCfgPath, "Configuration file `path`.")
+       flag.Usage = usage
        flag.Parse()
+
+       err := config.LoadFile(theConfig, *cfgPath)
+       if err != nil {
+               h := os.Getenv("ARVADOS_API_HOST")
+               if h == "" || !os.IsNotExist(err) || *cfgPath != defaultCfgPath {
+                       log.Fatal(err)
+               }
+               log.Print("DEPRECATED: No config file found, but ARVADOS_API_HOST environment variable is set. Please use a config file instead.")
+               theConfig.Client.APIHost = h
+               if regexp.MustCompile("^(?i:1|yes|true)$").MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")) {
+                       theConfig.Client.Insecure = true
+               }
+               if j, err := json.MarshalIndent(theConfig, "", "    "); err == nil {
+                       log.Print("Current configuration:\n", string(j))
+               }
+       }
+}
+
+func main() {
        srv := &server{}
        if err := srv.Start(); err != nil {
                log.Fatal(err)
        }
+       if _, err := daemon.SdNotify("READY=1"); err != nil {
+               log.Printf("Error notifying init daemon: %v", err)
+       }
        log.Println("Listening at", srv.Addr)
-       log.Println("Repository root", theConfig.Root)
+       log.Println("Repository root", theConfig.RepoRoot)
        if err := srv.Wait(); err != nil {
                log.Fatal(err)
        }
index 40e77a812a6ff4c04a524ca76b819555b9e6e69e..e2311d22e876861ecbe8749b8f552aff2c2c2871 100644 (file)
@@ -12,8 +12,8 @@ type server struct {
 
 func (srv *server) Start() error {
        mux := http.NewServeMux()
-       mux.Handle("/", &authHandler{newGitHandler()})
+       mux.Handle("/", &authHandler{handler: newGitHandler()})
        srv.Handler = mux
-       srv.Addr = theConfig.Addr
+       srv.Addr = theConfig.Listen
        return srv.Server.Start()
 }
diff --git a/services/arv-git-httpd/usage.go b/services/arv-git-httpd/usage.go
new file mode 100644 (file)
index 0000000..a4a9900
--- /dev/null
@@ -0,0 +1,62 @@
+package main
+
+import (
+       "encoding/json"
+       "flag"
+       "fmt"
+       "os"
+)
+
+func usage() {
+       c := defaultConfig()
+       c.Client.APIHost = "zzzzz.arvadosapi.com:443"
+       exampleConfigFile, err := json.MarshalIndent(c, "    ", "  ")
+       if err != nil {
+               panic(err)
+       }
+       fmt.Fprintf(os.Stderr, `
+
+arv-git-httpd provides authenticated access to Arvados-hosted git repositories.
+
+See http://doc.arvados.org/install/install-arv-git-httpd.html.
+
+Usage: arv-git-httpd [-config path/to/config.json]
+
+Options:
+`)
+       flag.PrintDefaults()
+       fmt.Fprintf(os.Stderr, `
+Example config file:
+    %s
+
+Client.APIHost:
+
+    Address (or address:port) of the Arvados API endpoint.
+
+Client.AuthToken:
+
+    Unused. Normally empty, or omitted entirely.
+
+Client.Insecure:
+
+    True if your Arvados API endpoint uses an unverifiable SSL/TLS
+    certificate.
+
+Listen:
+
+    Local port to listen on. Can be "address:port" or ":port", where
+    "address" is a host IP address or name and "port" is a port number
+    or name.
+
+GitCommand:
+
+    Path to git or gitolite-shell executable. Each authenticated
+    request will execute this program with the single argument
+    "http-backend".
+
+RepoRoot:
+
+    Path to git repositories. Defaults to current working directory.
+
+`, exampleConfigFile)
+}
index 39f110962b8c79f39fff8b8de6ba38fefc850f5a..8b6d01969a7819f52975c511f1d7a954aaaa324d 100644 (file)
@@ -23,6 +23,33 @@ from mount_test_base import MountTestBase
 logger = logging.getLogger('arvados.arv-mount')
 
 
+class AssertWithTimeout(object):
+    """Allow some time for an assertion to pass."""
+
+    def __init__(self, timeout=0):
+        self.timeout = timeout
+
+    def __iter__(self):
+        self.deadline = time.time() + self.timeout
+        self.done = False
+        return self
+
+    def next(self):
+        if self.done:
+            raise StopIteration
+        return self.attempt
+
+    def attempt(self, fn, *args, **kwargs):
+        try:
+            fn(*args, **kwargs)
+        except AssertionError:
+            if time.time() > self.deadline:
+                raise
+            time.sleep(0.1)
+        else:
+            self.done = True
+
+
 class FuseMountTest(MountTestBase):
     def setUp(self):
         super(FuseMountTest, self).setUp()
@@ -182,18 +209,18 @@ class FuseTagsUpdateTest(MountTestBase):
 
         bar_uuid = run_test_server.fixture('collections')['bar_file']['uuid']
         self.tag_collection(bar_uuid, 'fuse_test_tag')
-        time.sleep(1)
-        self.assertIn('fuse_test_tag', llfuse.listdir(self.mounttmp))
+        for attempt in AssertWithTimeout(10):
+            attempt(self.assertIn, 'fuse_test_tag', llfuse.listdir(self.mounttmp))
         self.assertDirContents('fuse_test_tag', [bar_uuid])
 
         baz_uuid = run_test_server.fixture('collections')['baz_file']['uuid']
         l = self.tag_collection(baz_uuid, 'fuse_test_tag')
-        time.sleep(1)
-        self.assertDirContents('fuse_test_tag', [bar_uuid, baz_uuid])
+        for attempt in AssertWithTimeout(10):
+            attempt(self.assertDirContents, 'fuse_test_tag', [bar_uuid, baz_uuid])
 
         self.api.links().delete(uuid=l['uuid']).execute()
-        time.sleep(1)
-        self.assertDirContents('fuse_test_tag', [bar_uuid])
+        for attempt in AssertWithTimeout(10):
+            attempt(self.assertDirContents, 'fuse_test_tag', [bar_uuid])
 
 
 class FuseSharedTest(MountTestBase):
@@ -713,12 +740,8 @@ class FuseUpdateFromEventTest(MountTestBase):
             with collection2.open("file1.txt", "w") as f:
                 f.write("foo")
 
-        time.sleep(1)
-
-        # should show up via event bus notify
-
-        d1 = llfuse.listdir(os.path.join(self.mounttmp))
-        self.assertEqual(["file1.txt"], sorted(d1))
+        for attempt in AssertWithTimeout(10):
+            attempt(self.assertEqual, ["file1.txt"], llfuse.listdir(os.path.join(self.mounttmp)))
 
 
 def fuseFileConflictTestHelper(mounttmp):
index 226c388ae29cb41258adb68a6ccb997e6677efbf..46664931fd64bbcf40717849cbf8f58abfd43a9b 100644 (file)
@@ -1,12 +1,10 @@
 package main
 
 import (
+       "encoding/json"
        "errors"
        "flag"
        "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "github.com/gorilla/mux"
        "io"
        "io/ioutil"
        "log"
@@ -18,98 +16,125 @@ import (
        "sync"
        "syscall"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/config"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "github.com/coreos/go-systemd/daemon"
+       "github.com/gorilla/mux"
 )
 
-// Default TCP address on which to listen for requests.
-// Override with -listen.
-const DefaultAddr = ":25107"
+type Config struct {
+       Client          arvados.Client
+       Listen          string
+       DisableGet      bool
+       DisablePut      bool
+       DefaultReplicas int
+       Timeout         arvados.Duration
+       PIDFile         string
+       Debug           bool
+}
+
+func DefaultConfig() *Config {
+       return &Config{
+               Listen:  ":25107",
+               Timeout: arvados.Duration(15 * time.Second),
+       }
+}
 
 var listener net.Listener
 
 func main() {
-       var (
-               listen           string
-               no_get           bool
-               no_put           bool
-               default_replicas int
-               timeout          int64
-               pidfile          string
-       )
+       cfg := DefaultConfig()
 
        flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
-
-       flagset.StringVar(
-               &listen,
-               "listen",
-               DefaultAddr,
-               "Interface on which to listen for requests, in the format "+
-                       "ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port "+
-                       "to listen on all network interfaces.")
-
-       flagset.BoolVar(
-               &no_get,
-               "no-get",
-               false,
-               "If set, disable GET operations")
-
-       flagset.BoolVar(
-               &no_put,
-               "no-put",
-               false,
-               "If set, disable PUT operations")
-
-       flagset.IntVar(
-               &default_replicas,
-               "default-replicas",
-               2,
-               "Default number of replicas to write if not specified by the client.")
-
-       flagset.Int64Var(
-               &timeout,
-               "timeout",
-               15,
-               "Timeout on requests to internal Keep services (default 15 seconds)")
-
-       flagset.StringVar(
-               &pidfile,
-               "pid",
-               "",
-               "Path to write pid file")
-
+       flagset.Usage = usage
+
+       const deprecated = " (DEPRECATED -- use config file instead)"
+       flagset.StringVar(&cfg.Listen, "listen", cfg.Listen, "Local port to listen on."+deprecated)
+       flagset.BoolVar(&cfg.DisableGet, "no-get", cfg.DisableGet, "Disable GET operations."+deprecated)
+       flagset.BoolVar(&cfg.DisablePut, "no-put", cfg.DisablePut, "Disable PUT operations."+deprecated)
+       flagset.IntVar(&cfg.DefaultReplicas, "default-replicas", cfg.DefaultReplicas, "Default number of replicas to write if not specified by the client. If 0, use site default."+deprecated)
+       flagset.StringVar(&cfg.PIDFile, "pid", cfg.PIDFile, "Path to write pid file."+deprecated)
+       timeoutSeconds := flagset.Int("timeout", int(time.Duration(cfg.Timeout)/time.Second), "Timeout (in seconds) on requests to internal Keep services."+deprecated)
+
+       var cfgPath string
+       const defaultCfgPath = "/etc/arvados/keepproxy/config.json"
+       flagset.StringVar(&cfgPath, "config", defaultCfgPath, "Configuration file `path`")
        flagset.Parse(os.Args[1:])
 
-       arv, err := arvadosclient.MakeArvadosClient()
+       err := config.LoadFile(cfg, cfgPath)
+       if err != nil {
+               h := os.Getenv("ARVADOS_API_HOST")
+               t := os.Getenv("ARVADOS_API_TOKEN")
+               if h == "" || t == "" || !os.IsNotExist(err) || cfgPath != defaultCfgPath {
+                       log.Fatal(err)
+               }
+               log.Print("DEPRECATED: No config file found, but ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables are set. Please use a config file instead.")
+               cfg.Client.APIHost = h
+               cfg.Client.AuthToken = t
+               if regexp.MustCompile("^(?i:1|yes|true)$").MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")) {
+                       cfg.Client.Insecure = true
+               }
+               if j, err := json.MarshalIndent(cfg, "", "    "); err == nil {
+                       log.Print("Current configuration:\n", string(j))
+               }
+               cfg.Timeout = arvados.Duration(time.Duration(*timeoutSeconds) * time.Second)
+       }
+
+       arv, err := arvadosclient.New(&cfg.Client)
        if err != nil {
                log.Fatalf("Error setting up arvados client %s", err.Error())
        }
 
-       if os.Getenv("ARVADOS_DEBUG") != "" {
+       if cfg.Debug {
                keepclient.DebugPrintf = log.Printf
        }
-       kc, err := keepclient.MakeKeepClient(&arv)
+       kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
                log.Fatalf("Error setting up keep client %s", err.Error())
        }
 
-       if pidfile != "" {
-               f, err := os.Create(pidfile)
+       if cfg.PIDFile != "" {
+               f, err := os.Create(cfg.PIDFile)
+               if err != nil {
+                       log.Fatal(err)
+               }
+               defer f.Close()
+               err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+               if err != nil {
+                       log.Fatalf("flock(%s): %s", cfg.PIDFile, err)
+               }
+               defer os.Remove(cfg.PIDFile)
+               err = f.Truncate(0)
+               if err != nil {
+                       log.Fatalf("truncate(%s): %s", cfg.PIDFile, err)
+               }
+               _, err = fmt.Fprint(f, os.Getpid())
                if err != nil {
-                       log.Fatalf("Error writing pid file (%s): %s", pidfile, err.Error())
+                       log.Fatalf("write(%s): %s", cfg.PIDFile, err)
+               }
+               err = f.Sync()
+               if err != nil {
+                       log.Fatal("sync(%s): %s", cfg.PIDFile, err)
                }
-               fmt.Fprint(f, os.Getpid())
-               f.Close()
-               defer os.Remove(pidfile)
        }
 
-       kc.Want_replicas = default_replicas
-       kc.Client.Timeout = time.Duration(timeout) * time.Second
+       if cfg.DefaultReplicas > 0 {
+               kc.Want_replicas = cfg.DefaultReplicas
+       }
+       kc.Client.Timeout = time.Duration(cfg.Timeout)
        go kc.RefreshServices(5*time.Minute, 3*time.Second)
 
-       listener, err = net.Listen("tcp", listen)
+       listener, err = net.Listen("tcp", cfg.Listen)
        if err != nil {
-               log.Fatalf("Could not listen on %v", listen)
+               log.Fatalf("listen(%s): %s", cfg.Listen, err)
+       }
+       if _, err := daemon.SdNotify("READY=1"); err != nil {
+               log.Printf("Error notifying init daemon: %v", err)
        }
-       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
+       log.Println("Listening at", listener.Addr())
 
        // Shut down the server gracefully (by closing the listener)
        // if SIGTERM is received.
@@ -123,7 +148,7 @@ func main() {
        signal.Notify(term, syscall.SIGINT)
 
        // Start serving requests.
-       http.Serve(listener, MakeRESTRouter(!no_get, !no_put, kc))
+       http.Serve(listener, MakeRESTRouter(!cfg.DisableGet, !cfg.DisablePut, kc))
 
        log.Println("shutting down")
 }
diff --git a/services/keepproxy/keepproxy.service b/services/keepproxy/keepproxy.service
new file mode 100644 (file)
index 0000000..5bd3036
--- /dev/null
@@ -0,0 +1,12 @@
+[Unit]
+Description=Arvados Keep Proxy
+Documentation=https://doc.arvados.org/
+After=network.target
+
+[Service]
+Type=notify
+ExecStart=/usr/bin/keepproxy
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
diff --git a/services/keepproxy/usage.go b/services/keepproxy/usage.go
new file mode 100644 (file)
index 0000000..18bf0ad
--- /dev/null
@@ -0,0 +1,82 @@
+package main
+
+import (
+       "encoding/json"
+       "flag"
+       "fmt"
+       "os"
+)
+
+func usage() {
+       c := DefaultConfig()
+       c.Client.APIHost = "zzzzz.arvadosapi.com:443"
+       exampleConfigFile, err := json.MarshalIndent(c, "    ", "  ")
+       if err != nil {
+               panic(err)
+       }
+       fmt.Fprintf(os.Stderr, `
+
+Keepproxy forwards GET and PUT requests to keepstore servers.  See
+http://doc.arvados.org/install/install-keepproxy.html
+
+Usage: keepproxy [-config path/to/config.json]
+
+Options:
+`)
+       flag.PrintDefaults()
+       fmt.Fprintf(os.Stderr, `
+Example config file:
+    %s
+
+Client.APIHost:
+
+    Address (or address:port) of the Arvados API endpoint.
+
+Client.AuthToken:
+
+    Anonymous API token.
+
+Client.Insecure:
+
+    True if your Arvados API endpoint uses an unverifiable SSL/TLS
+    certificate.
+
+Listen:
+
+    Local port to listen on. Can be "address:port" or ":port", where
+    "address" is a host IP address or name and "port" is a port number
+    or name.
+
+DisableGet:
+
+    Respond 404 to GET and HEAD requests.
+
+DisablePut:
+
+    Respond 404 to PUT, POST, and OPTIONS requests.
+
+DefaultReplicas:
+
+    Default number of replicas to write if not specified by the
+    client. If this is zero or omitted, the site-wide
+    defaultCollectionReplication configuration will be used.
+
+Timeout:
+
+    Timeout for requests to keep services, with units (e.g., "120s",
+    "2m").
+
+PIDFile:
+
+    Path to PID file. During startup this file will be created if
+    needed, and locked using flock() until keepproxy exits. If it is
+    already locked, or any error is encountered while writing to it,
+    keepproxy will exit immediately. If omitted or empty, no PID file
+    will be used.
+
+Debug:
+
+    Enable debug logging.
+
+`, exampleConfigFile)
+}