14538: Merge branch 'master' into 14538-async-write
authorTom Clegg <tclegg@veritasgenetics.com>
Mon, 10 Dec 2018 16:38:35 +0000 (11:38 -0500)
committerTom Clegg <tclegg@veritasgenetics.com>
Mon, 10 Dec 2018 16:38:35 +0000 (11:38 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

44 files changed:
apps/workbench/app/controllers/users_controller.rb
apps/workbench/app/controllers/work_units_controller.rb
apps/workbench/test/controllers/users_controller_test.rb
build/package-testing/test-package-python27-python-arvados-cwl-runner.sh
build/run-build-docker-jobs-image.sh
doc/admin/upgrading.html.textile.liquid
doc/install/install-controller.html.textile.liquid
doc/user/cwl/cwl-extensions.html.textile.liquid
docker/jobs/apt.arvados.org.list
sdk/cli/arvados-cli.gemspec
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arv-cwl-schema.yml
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/context.py
sdk/cwl/arvados_cwl/crunch_script.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/fsaccess.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/arvados_cwl/task_queue.py
sdk/cwl/arvados_version.py
sdk/cwl/gittaggers.py
sdk/cwl/setup.py
sdk/cwl/tests/submit_test_job_missing.json [new file with mode: 0644]
sdk/cwl/tests/test_fsaccess.py
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/test_tq.py
sdk/cwl/tests/wf/submit_wf_runner_resources.cwl
sdk/pam/arvados_version.py
sdk/python/arvados_version.py
sdk/python/gittaggers.py
sdk/ruby/arvados.gemspec
services/api/app/models/arvados_model.rb
services/api/app/models/container.rb
services/api/db/migrate/20180917200000_replace_full_text_indexes.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/test/unit/container_test.rb
services/dockercleaner/arvados_version.py
services/fuse/arvados_version.py
services/login-sync/arvados-login-sync.gemspec
services/nodemanager/arvados_version.py
tools/crunchstat-summary/arvados_version.py

index 8cfc2c10f1c29eee67a11074acfabe70da19aaba..c954944e0b4b8ad75ff75a805caf2927893a8c50 100644 (file)
@@ -144,7 +144,7 @@ class UsersController < ApplicationController
                                       owner_uuid: @object.uuid
                                     }
                                   })
-    redirect_to root_url(api_token: resp[:api_token])
+    redirect_to root_url(api_token: "v2/#{resp[:uuid]}/#{resp[:api_token]}")
   end
 
   def home
index 8527b4d48cb717b941ab376b68255e917c5797a3..767762c81e3cd3d899bda0b3bce873cc97c390b9 100644 (file)
@@ -85,12 +85,6 @@ class WorkUnitsController < ApplicationController
       attrs['state'] = "Uncommitted"
 
       # required
-      attrs['command'] = ["arvados-cwl-runner",
-                          "--local",
-                          "--api=containers",
-                          "--project-uuid=#{params['work_unit']['owner_uuid']}",
-                          "/var/lib/cwl/workflow.json#main",
-                          "/var/lib/cwl/cwl.input.json"]
       attrs['container_image'] = "arvados/jobs"
       attrs['cwd'] = "/var/spool/cwl"
       attrs['output_path'] = "/var/spool/cwl"
@@ -102,6 +96,7 @@ class WorkUnitsController < ApplicationController
         "API" => true
       }
 
+      keep_cache = 256
       input_defaults = {}
       if wf_json
         main = get_cwl_main(wf_json)
@@ -119,11 +114,22 @@ class WorkUnitsController < ApplicationController
               if hint[:ramMin]
                 runtime_constraints["ram"] = hint[:ramMin] * 1024 * 1024
               end
+              if hint[:keep_cache]
+                keep_cache = hint[:keep_cache]
+              end
             end
           end
         end
       end
 
+      attrs['command'] = ["arvados-cwl-runner",
+                          "--local",
+                          "--api=containers",
+                          "--project-uuid=#{params['work_unit']['owner_uuid']}",
+                          "--collection-keep-cache=#{keep_cache}",
+                          "/var/lib/cwl/workflow.json#main",
+                          "/var/lib/cwl/cwl.input.json"]
+
       # mounts
       mounts = {
         "/var/lib/cwl/cwl.input.json" => {
index 50b35021c093f23facb414667e74b84890f311b0..393b864dc53a61f3a81a91af6abd49e836a5e831 100644 (file)
@@ -35,6 +35,14 @@ class UsersControllerTest < ActionController::TestCase
     assert_match /\/users\/welcome/, @response.redirect_url
   end
 
+  test "'log in as user' feature uses a v2 token" do
+    post :sudo, {
+      id: api_fixture('users')['active']['uuid']
+    }, session_for('admin_trustedclient')
+    assert_response :redirect
+    assert_match /api_token=v2%2F/, @response.redirect_url
+  end
+
   test "request shell access" do
     user = api_fixture('users')['spectator']
 
index 0274c8f45ea520eaeb0e4aa453aaaba92f09d984..e499238d89eb2572af6beb6f9d9a05bce1dd8b31 100755 (executable)
@@ -3,6 +3,10 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+set -e
+
+arvados-cwl-runner --version
+
 exec python <<EOF
 import arvados_cwl
 print "arvados-cwl-runner version", arvados_cwl.__version__
index b1e99fc66b27c40d3ac13542e6f7de76ba37e202..83bb5ae7165cb2cfd11879e85411253472887b26 100755 (executable)
@@ -118,6 +118,11 @@ timer_reset
 # clean up the docker build environment
 cd "$WORKSPACE"
 
+if [[ -z "$ARVADOS_BUILDING_VERSION" ]] && ! [[ -z "$version_tag" ]]; then
+       ARVADOS_BUILDING_VERSION="$version_tag"
+       ARVADOS_BUILDING_ITERATION="1"
+fi
+
 python_sdk_ts=$(cd sdk/python && timestamp_from_git)
 cwl_runner_ts=$(cd sdk/cwl && timestamp_from_git)
 
@@ -130,11 +135,25 @@ fi
 
 echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
 
+if [[ "${python_sdk_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
+       python_sdk_version="${python_sdk_version}-2"
+else
+       python_sdk_version="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
+fi
+
+cwl_runner_version_orig=$cwl_runner_version
+
+if [[ "${cwl_runner_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
+       cwl_runner_version="${cwl_runner_version}-4"
+else
+       cwl_runner_version="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
+fi
+
 cd docker/jobs
 docker build $NOCACHE \
-       --build-arg python_sdk_version=${python_sdk_version}-2 \
-       --build-arg cwl_runner_version=${cwl_runner_version}-4 \
-       -t arvados/jobs:$cwl_runner_version .
+       --build-arg python_sdk_version=${python_sdk_version} \
+       --build-arg cwl_runner_version=${cwl_runner_version} \
+       -t arvados/jobs:$cwl_runner_version_orig .
 
 ECODE=$?
 
@@ -157,9 +176,9 @@ if docker --version |grep " 1\.[0-9]\." ; then
     FORCE=-f
 fi
 if ! [[ -z "$version_tag" ]]; then
-    docker tag $FORCE arvados/jobs:$cwl_runner_version arvados/jobs:"$version_tag"
+    docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
 else
-    docker tag $FORCE arvados/jobs:$cwl_runner_version arvados/jobs:latest
+    docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:latest
 fi
 
 ECODE=$?
@@ -185,7 +204,7 @@ else
         if ! [[ -z "$version_tag" ]]; then
             docker_push arvados/jobs:"$version_tag"
         else
-           docker_push arvados/jobs:$cwl_runner_version
+           docker_push arvados/jobs:$cwl_runner_version_orig
            docker_push arvados/jobs:latest
         fi
         title "upload arvados images finished (`timer`)"
index 0941fef339273f903d71ceabdb0aa99813773124..6a3e000ca0165649b0418dba566a43999be1be09 100644 (file)
@@ -30,6 +30,14 @@ Note to developers: Add new items at the top. Include the date, issue number, co
 TODO: extract this information based on git commit messages and generate changelogs / release notes automatically.
 {% endcomment %}
 
+h3. v1.3.0 (2018-12-05)
+
+This release includes several database migrations, which will be executed automatically as part of the API server upgrade. On large Arvados installations, these migrations will take a while. We've seen the upgrade take 30 minutes or more on installations with a lot of collections.
+
+The @arvados-controller@ component now requires the /etc/arvados/config.yml file to be present. See <a href="{{ site.baseurl }}/install/install-controller.html#configuration">the @arvados-controller@ installation instructions</a>.
+
+Support for the deprecated "jobs" API is broken in this release.  Users who rely on it should not upgrade.  This will be fixed in an upcoming 1.3.1 patch release, however users are "encouraged to migrate":upgrade-crunch2.html as support for the "jobs" API will be dropped in an upcoming release.  Users who are already using the "containers" API are not affected.
+
 h3. v1.2.1 (2018-11-26)
 
 There are no special upgrade notes for this release.
index ccb8d980aebc1f3f658a5ed603459ca15878736d..3e94b290d54076e77a12a44097061f6ed935f79f 100644 (file)
@@ -85,7 +85,7 @@ Restart Nginx to apply the new configuration.
 </code></pre>
 </notextile>
 
-h3. Configure arvados-controller
+h3(#configuration). Configure arvados-controller
 
 Create the cluster configuration file @/etc/arvados/config.yml@ using the following template.
 
index 7abc794e198d6910eb64bbf88da79d218c7e2b25..d62002237a7e7b1d43aa7c59f4ef1afa7bc38b84 100644 (file)
@@ -43,6 +43,7 @@ hints:
   arv:WorkflowRunnerResources:
     ramMin: 2048
     coresMin: 2
+    keep_cache: 512
   arv:ClusterTarget:
     cluster_id: clsr1
     project_uuid: clsr1-j7d0g-qxc4jcji7n4lafx
@@ -137,6 +138,7 @@ table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |ramMin|int|RAM, in mebibytes, to reserve for the arvados-cwl-runner process. Default 1 GiB|
 |coresMin|int|Number of cores to reserve to the arvados-cwl-runner process. Default 1 core.|
+|keep_cache|int|Size of collection metadata cache for the workflow runner, in MiB.  Default 256 MiB.  Will be added on to the RAM request when determining node size to request.|
 
 h2(#clustertarget). arv:ClusterTarget
 
index ae1a0862a67437e257fa1fdec2919473799ac56c..11b98e25bb0f3905ef299c1bc1d1a94f08c4a664 100644 (file)
@@ -1,3 +1,2 @@
 # apt.arvados.org
-deb http://apt.arvados.org/ jessie main
 deb http://apt.arvados.org/ jessie-dev main
index 723b5166865ab6b272dbb885b92f73c009125141..80abc9c497f2da9f0f72ebd022e47ae4fb07a14e 100644 (file)
@@ -7,7 +7,7 @@ if not File.exist?('/usr/bin/git') then
   exit
 end
 
-git_latest_tag = `git describe --abbrev=0`
+git_latest_tag = `git tag -l |sort -V -r |head -n1`
 git_latest_tag = git_latest_tag.encode('utf-8').strip
 git_timestamp, git_hash = `git log -n1 --first-parent --format=%ct:%H .`.chomp.split(":")
 git_timestamp = Time.at(git_timestamp.to_i).utc
index 9b814f534c11af95aa59b77cdb3eaaa737866195..7e149528308fc9c6e38e0021af858da5450b58f8 100644 (file)
@@ -159,6 +159,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                          default=None,
                          metavar="CLUSTER_ID")
 
+    parser.add_argument("--collection-cache-size", type=int,
+                        default=None,
+                        help="Collection cache size (in MiB, default 256).")
+
     parser.add_argument("--name", type=str,
                         help="Name to use for workflow execution instance.",
                         default=None)
@@ -191,7 +195,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help=argparse.SUPPRESS)
 
     parser.add_argument("--thread-count", type=int,
-                        default=4, help="Number of threads to use for job submit and output collection.")
+                        default=1, help="Number of threads to use for job submit and output collection.")
 
     parser.add_argument("--http-timeout", type=int,
                         default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
index 902b1ffba299240438c60c8a0a866db598b2a101..dce1bd4d0247d2f56af8902f844814633b739b25 100644 (file)
@@ -233,6 +233,13 @@ $graph:
       type: int?
       doc: Minimum cores allocated to cwl-runner
       jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
+    keep_cache:
+      type: int?
+      doc: |
+        Size of collection metadata cache for the workflow runner, in
+        MiB.  Default 256 MiB.  Will be added on to the RAM request
+        when determining node size to request.
+      jsonldPredicate: "http://arvados.org/cwl#RuntimeConstraints/keep_cache"
 
 - name: ClusterTarget
   type: record
index f1ae65fc0f97e4b29f69fed0ca1fa3cf9ac78de2..4f8c0338b3f04fb9a51afaf21a6d1ebbc47bf992 100644 (file)
@@ -407,15 +407,15 @@ class RunnerContainer(Runner):
             "secret_mounts": secret_mounts,
             "runtime_constraints": {
                 "vcpus": math.ceil(self.submit_runner_cores),
-                "ram": math.ceil(1024*1024 * self.submit_runner_ram),
+                "ram": 1024*1024 * (math.ceil(self.submit_runner_ram) + math.ceil(self.collection_cache_size)),
                 "API": True
             },
             "use_existing": self.enable_reuse,
             "properties": {}
         }
 
-        if self.tool.tool.get("id", "").startswith("keep:"):
-            sp = self.tool.tool["id"].split('/')
+        if self.embedded_tool.tool.get("id", "").startswith("keep:"):
+            sp = self.embedded_tool.tool["id"].split('/')
             workflowcollection = sp[0][5:]
             workflowname = "/".join(sp[1:])
             workflowpath = "/var/lib/cwl/workflow/%s" % workflowname
@@ -424,14 +424,14 @@ class RunnerContainer(Runner):
                 "portable_data_hash": "%s" % workflowcollection
             }
         else:
-            packed = packed_workflow(self.arvrunner, self.tool, self.merged_map)
+            packed = packed_workflow(self.arvrunner, self.embedded_tool, self.merged_map)
             workflowpath = "/var/lib/cwl/workflow.json#main"
             container_req["mounts"]["/var/lib/cwl/workflow.json"] = {
                 "kind": "json",
                 "content": packed
             }
-            if self.tool.tool.get("id", "").startswith("arvwf:"):
-                container_req["properties"]["template_uuid"] = self.tool.tool["id"][6:33]
+            if self.embedded_tool.tool.get("id", "").startswith("arvwf:"):
+                container_req["properties"]["template_uuid"] = self.embedded_tool.tool["id"][6:33]
 
 
         # --local means execute the workflow instead of submitting a container request
@@ -441,6 +441,7 @@ class RunnerContainer(Runner):
         # --eval-timeout is the timeout for javascript invocation
         # --parallel-task-count is the number of threads to use for job submission
         # --enable/disable-reuse sets desired job reuse
+        # --collection-cache-size sets aside memory to store collections
         command = ["arvados-cwl-runner",
                    "--local",
                    "--api=containers",
@@ -448,7 +449,8 @@ class RunnerContainer(Runner):
                    "--disable-validate",
                    "--eval-timeout=%s" % self.arvrunner.eval_timeout,
                    "--thread-count=%s" % self.arvrunner.thread_count,
-                   "--enable-reuse" if self.enable_reuse else "--disable-reuse"]
+                   "--enable-reuse" if self.enable_reuse else "--disable-reuse",
+                   "--collection-cache-size=%s" % self.collection_cache_size]
 
         if self.output_name:
             command.append("--output-name=" + self.output_name)
index 9a03372d32de9375e9401fe4fc4099dce61f1181..2e4ef55015157c4196c649797029b1e9912e00bb 100644 (file)
@@ -296,10 +296,10 @@ class RunnerJob(Runner):
         a pipeline template or pipeline instance.
         """
 
-        if self.tool.tool["id"].startswith("keep:"):
-            self.job_order["cwl:tool"] = self.tool.tool["id"][5:]
+        if self.embedded_tool.tool["id"].startswith("keep:"):
+            self.job_order["cwl:tool"] = self.embedded_tool.tool["id"][5:]
         else:
-            packed = packed_workflow(self.arvrunner, self.tool, self.merged_map)
+            packed = packed_workflow(self.arvrunner, self.embedded_tool, self.merged_map)
             wf_pdh = upload_workflow_collection(self.arvrunner, self.name, packed)
             self.job_order["cwl:tool"] = "%s/workflow.cwl#main" % wf_pdh
 
@@ -386,19 +386,21 @@ class RunnerTemplate(object):
     }
 
     def __init__(self, runner, tool, job_order, enable_reuse, uuid,
-                 submit_runner_ram=0, name=None, merged_map=None):
+                 submit_runner_ram=0, name=None, merged_map=None,
+                 loadingContext=None):
         self.runner = runner
-        self.tool = tool
+        self.embedded_tool = tool
         self.job = RunnerJob(
             runner=runner,
             tool=tool,
-            job_order=job_order,
             enable_reuse=enable_reuse,
             output_name=None,
             output_tags=None,
             submit_runner_ram=submit_runner_ram,
             name=name,
-            merged_map=merged_map)
+            merged_map=merged_map,
+            loadingContext=loadingContext)
+        self.job.job_order = job_order
         self.uuid = uuid
 
     def pipeline_component_spec(self):
@@ -420,7 +422,7 @@ class RunnerTemplate(object):
         job_params = spec['script_parameters']
         spec['script_parameters'] = {}
 
-        for param in self.tool.tool['inputs']:
+        for param in self.embedded_tool.tool['inputs']:
             param = copy.deepcopy(param)
 
             # Data type and "required" flag...
index cd319e55b12137db6170b37b763a0dccb36d497b..c4e9f44abb0b20ecb66a7bdc13c5240beaaeeccb 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from cwltool.command_line_tool import CommandLineTool
+from cwltool.command_line_tool import CommandLineTool, ExpressionTool
 from cwltool.builder import Builder
 from .arvjob import ArvadosJob
 from .arvcontainer import ArvadosContainer
@@ -105,3 +105,15 @@ class ArvadosCommandTool(CommandLineTool):
             runtimeContext.tmpdir = "$(task.tmpdir)"
             runtimeContext.docker_tmpdir = "$(task.tmpdir)"
         return super(ArvadosCommandTool, self).job(joborder, output_callback, runtimeContext)
+
+class ArvadosExpressionTool(ExpressionTool):
+    def __init__(self, arvrunner, toolpath_object, loadingContext):
+        super(ArvadosExpressionTool, self).__init__(toolpath_object, loadingContext)
+        self.arvrunner = arvrunner
+
+    def job(self,
+            job_order,         # type: Mapping[Text, Text]
+            output_callback,  # type: Callable[[Any, Any], Any]
+            runtimeContext     # type: RuntimeContext
+           ):
+        return super(ArvadosExpressionTool, self).job(job_order, self.arvrunner.get_wrapped_callback(output_callback), runtimeContext)
index f514476b9099895d3739dbb34ab0a0c372bd7e73..ea167d4044d76fa91953eb401962107afd6b878e 100644 (file)
@@ -155,7 +155,7 @@ class ArvadosWorkflow(Workflow):
 
     def job(self, joborder, output_callback, runtimeContext):
 
-        builder = self._init_job(joborder, runtimeContext)
+        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
 
         req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
@@ -205,6 +205,9 @@ class ArvadosWorkflow(Workflow):
                                                     raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
                                 if not dyn:
                                     self.static_resource_req.append(req)
+                            if req["class"] == "DockerRequirement":
+                                if "http://arvados.org/cwl#dockerCollectionPDH" in req:
+                                    del req["http://arvados.org/cwl#dockerCollectionPDH"]
 
                 visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
 
index 7831e1cfd0822abbcac5a77c460a33e8ff492714..8cfe22ad7b6619f1f02d95eaf71153e44e52fd01 100644 (file)
@@ -34,6 +34,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.submit_runner_cluster = None
         self.cluster_target_id = 0
         self.always_submit_runner = False
+        self.collection_cache_size = 256
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
index 7512d5bef27f28014f650d897d24e3d59cb7b3c4..61f9cbbe0dc80a7ce7c4894ccb2697c0b0310652 100644 (file)
@@ -103,6 +103,7 @@ def run():
         arvargs.output_name = output_name
         arvargs.output_tags = output_tags
         arvargs.thread_count = 1
+        arvargs.collection_cache_size = None
 
         runner = arvados_cwl.ArvCwlExecutor(api_client=arvados.safeapi.ThreadSafeApiCache(
             api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
index 6cac709260fcef6f464d5d7e54706305e883a685..27774b2f7cf6bd1fbb9bd8474f5dde4e7e4d6d51 100644 (file)
@@ -27,9 +27,9 @@ import arvados_cwl.util
 from .arvcontainer import RunnerContainer
 from .arvjob import RunnerJob, RunnerTemplate
 from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
-from .arvtool import ArvadosCommandTool, validate_cluster_target
+from .arvtool import ArvadosCommandTool, validate_cluster_target, ArvadosExpressionTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow
-from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache
+from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache, pdh_size
 from .perf import Perf
 from .pathmapper import NoFollowPathMapper
 from .task_queue import TaskQueue
@@ -37,7 +37,7 @@ from .context import ArvLoadingContext, ArvRuntimeContext
 from ._version import __version__
 
 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
-from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
+from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing, visit_class
 from cwltool.command_line_tool import compute_checksums
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -95,6 +95,7 @@ class ArvCwlExecutor(object):
             arvargs.output_name = None
             arvargs.output_tags = None
             arvargs.thread_count = 1
+            arvargs.collection_cache_size = None
 
         self.api = api_client
         self.processes = {}
@@ -116,13 +117,21 @@ class ArvCwlExecutor(object):
         self.thread_count = arvargs.thread_count
         self.poll_interval = 12
         self.loadingContext = None
+        self.should_estimate_cache_size = True
 
         if keep_client is not None:
             self.keep_client = keep_client
         else:
             self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
 
-        self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries)
+        if arvargs.collection_cache_size:
+            collection_cache_size = arvargs.collection_cache_size*1024*1024
+            self.should_estimate_cache_size = False
+        else:
+            collection_cache_size = 256*1024*1024
+
+        self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries,
+                                                cap=collection_cache_size)
 
         self.fetcher_constructor = partial(CollectionFetcher,
                                            api_client=self.api,
@@ -186,8 +195,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             return ArvadosCommandTool(self, toolpath_object, loadingContext)
         elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
             return ArvadosWorkflow(self, toolpath_object, loadingContext)
+        elif "class" in toolpath_object and toolpath_object["class"] == "ExpressionTool":
+            return ArvadosExpressionTool(self, toolpath_object, loadingContext)
         else:
-            return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
+            raise Exception("Unknown tool %s" % toolpath_object.get("class"))
 
     def output_callback(self, out, processStatus):
         with self.workflow_eval_lock:
@@ -206,7 +217,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
 
 
     def start_run(self, runnable, runtimeContext):
-        self.task_queue.add(partial(runnable.run, runtimeContext))
+        self.task_queue.add(partial(runnable.run, runtimeContext),
+                            self.workflow_eval_lock, self.stop_polling)
 
     def process_submitted(self, container):
         with self.workflow_eval_lock:
@@ -216,7 +228,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         with self.workflow_eval_lock:
             j = self.processes[uuid]
             logger.info("%s %s is %s", self.label(j), uuid, record["state"])
-            self.task_queue.add(partial(j.done, record))
+            self.task_queue.add(partial(j.done, record),
+                                self.workflow_eval_lock, self.stop_polling)
             del self.processes[uuid]
 
     def runtime_status_update(self, kind, message, detail=None):
@@ -546,7 +559,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                       uuid=existing_uuid,
                                       submit_runner_ram=runtimeContext.submit_runner_ram,
                                       name=runtimeContext.name,
-                                      merged_map=merged_map)
+                                      merged_map=merged_map,
+                                      loadingContext=loadingContext)
                 tmpl.save()
                 # cwltool.main will write our return value to stdout.
                 return (tmpl.uuid, "success")
@@ -584,17 +598,29 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
             raise Exception("--priority must be in the range 1..1000.")
 
+        if self.should_estimate_cache_size:
+            visited = set()
+            estimated_size = [0]
+            def estimate_collection_cache(obj):
+                if obj.get("location", "").startswith("keep:"):
+                    m = pdh_size.match(obj["location"][5:])
+                    if m and m.group(1) not in visited:
+                        visited.add(m.group(1))
+                        estimated_size[0] += int(m.group(2))
+            visit_class(job_order, ("File", "Directory"), estimate_collection_cache)
+            runtimeContext.collection_cache_size = max(((estimated_size[0]*192) / (1024*1024))+1, 256)
+            self.collection_cache.set_cap(runtimeContext.collection_cache_size*1024*1024)
+
+        logger.info("Using collection cache size %s MiB", runtimeContext.collection_cache_size)
+
         runnerjob = None
         if runtimeContext.submit:
             # Submit a runner job to run the workflow for us.
             if self.work_api == "containers":
                 if tool.tool["class"] == "CommandLineTool" and runtimeContext.wait and (not runtimeContext.always_submit_runner):
                     runtimeContext.runnerjob = tool.tool["id"]
-                    runnerjob = tool.job(job_order,
-                                         self.output_callback,
-                                         runtimeContext).next()
                 else:
-                    runnerjob = RunnerContainer(self, tool, job_order, runtimeContext.enable_reuse,
+                    tool = RunnerContainer(self, tool, loadingContext, runtimeContext.enable_reuse,
                                                 self.output_name,
                                                 self.output_tags,
                                                 submit_runner_ram=runtimeContext.submit_runner_ram,
@@ -604,9 +630,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                                 intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
                                                 merged_map=merged_map,
                                                 priority=runtimeContext.priority,
-                                                secret_store=self.secret_store)
+                                                secret_store=self.secret_store,
+                                                collection_cache_size=runtimeContext.collection_cache_size,
+                                                collection_cache_is_default=self.should_estimate_cache_size)
             elif self.work_api == "jobs":
-                runnerjob = RunnerJob(self, tool, job_order, runtimeContext.enable_reuse,
+                tool = RunnerJob(self, tool, loadingContext, runtimeContext.enable_reuse,
                                       self.output_name,
                                       self.output_tags,
                                       submit_runner_ram=runtimeContext.submit_runner_ram,
@@ -624,10 +652,16 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
             logger.info("Pipeline instance %s", self.pipeline["uuid"])
 
-        if runnerjob and not runtimeContext.wait:
-            submitargs = runtimeContext.copy()
-            submitargs.submit = False
-            runnerjob.run(submitargs)
+        if runtimeContext.cwl_runner_job is not None:
+            self.uuid = runtimeContext.cwl_runner_job.get('uuid')
+
+        jobiter = tool.job(job_order,
+                           self.output_callback,
+                           runtimeContext)
+
+        if runtimeContext.submit and not runtimeContext.wait:
+            runnerjob = jobiter.next()
+            runnerjob.run(runtimeContext)
             return (runnerjob.uuid, "success")
 
         current_container = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
@@ -642,14 +676,6 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
 
         try:
             self.workflow_eval_lock.acquire()
-            if runnerjob:
-                jobiter = iter((runnerjob,))
-            else:
-                if runtimeContext.cwl_runner_job is not None:
-                    self.uuid = runtimeContext.cwl_runner_job.get('uuid')
-                jobiter = tool.job(job_order,
-                                   self.output_callback,
-                                   runtimeContext)
 
             # Holds the lock while this code runs and releases it when
             # it is safe to do so in self.workflow_eval_lock.wait(),
@@ -676,6 +702,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     else:
                         logger.error("Workflow is deadlocked, no runnable processes and not waiting on any pending processes.")
                         break
+
+                if self.stop_polling.is_set():
+                    break
+
                 loopperf.__enter__()
             loopperf.__exit__()
 
@@ -694,8 +724,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             if self.pipeline:
                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
                                                      body={"state": "Failed"}).execute(num_retries=self.num_retries)
-            if runnerjob and runnerjob.uuid and self.work_api == "containers":
-                self.api.container_requests().update(uuid=runnerjob.uuid,
+            if runtimeContext.submit and isinstance(tool, Runner):
+                runnerjob = tool
+                if runnerjob.uuid and self.work_api == "containers":
+                    self.api.container_requests().update(uuid=runnerjob.uuid,
                                                      body={"priority": "0"}).execute(num_retries=self.num_retries)
         finally:
             self.workflow_eval_lock.release()
@@ -710,8 +742,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         if self.final_output is None:
             raise WorkflowException("Workflow did not return a result.")
 
-        if runtimeContext.submit and isinstance(runnerjob, Runner):
-            logger.info("Final output collection %s", runnerjob.final_output)
+        if runtimeContext.submit and isinstance(tool, Runner):
+            logger.info("Final output collection %s", tool.final_output)
         else:
             if self.output_name is None:
                 self.output_name = "Output of %s" % (shortname(tool.tool["id"]))
index 5981268128486496f22d28b4928d0b90b5775e7e..0816ee8fc05b74198ae9abad69887905bf8113ee 100644 (file)
@@ -28,6 +28,8 @@ from schema_salad.ref_resolver import DefaultFetcher
 
 logger = logging.getLogger('arvados.cwl-runner')
 
+pdh_size = re.compile(r'([0-9a-f]{32})\+(\d+)(\+\S+)*')
+
 class CollectionCache(object):
     def __init__(self, api_client, keep_client, num_retries,
                  cap=256*1024*1024,
@@ -41,20 +43,26 @@ class CollectionCache(object):
         self.cap = cap
         self.min_entries = min_entries
 
-    def cap_cache(self):
-        if self.total > self.cap:
-            # ordered list iterates from oldest to newest
-            for pdh, v in self.collections.items():
-                if self.total < self.cap or len(self.collections) < self.min_entries:
-                    break
-                # cut it loose
-                logger.debug("Evicting collection reader %s from cache", pdh)
-                del self.collections[pdh]
-                self.total -= v[1]
+    def set_cap(self, cap):
+        self.cap = cap
+
+    def cap_cache(self, required):
+        # ordered dict iterates from oldest to newest
+        for pdh, v in self.collections.items():
+            available = self.cap - self.total
+            if available >= required or len(self.collections) < self.min_entries:
+                return
+            # cut it loose
+            logger.debug("Evicting collection reader %s from cache (cap %s total %s required %s)", pdh, self.cap, self.total, required)
+            del self.collections[pdh]
+            self.total -= v[1]
 
     def get(self, pdh):
         with self.lock:
             if pdh not in self.collections:
+                m = pdh_size.match(pdh)
+                if m:
+                    self.cap_cache(int(m.group(2)) * 128)
                 logger.debug("Creating collection reader for %s", pdh)
                 cr = arvados.collection.CollectionReader(pdh, api_client=self.api_client,
                                                          keep_client=self.keep_client,
@@ -62,7 +70,6 @@ class CollectionCache(object):
                 sz = len(cr.manifest_text()) * 128
                 self.collections[pdh] = (cr, sz)
                 self.total += sz
-                self.cap_cache()
             else:
                 cr, sz = self.collections[pdh]
                 # bump it to the back
index a846f2b0016931dcd6a938c47f9572083963d2bc..1d5f98f200c16bb444878da3b418f01b57bf1002 100644 (file)
@@ -16,7 +16,7 @@ from schema_salad.sourceline import SourceLine, cmap
 
 from cwltool.command_line_tool import CommandLineTool
 import cwltool.workflow
-from cwltool.process import scandeps, UnsupportedRequirement, normalizeFilesDirs, shortname
+from cwltool.process import scandeps, UnsupportedRequirement, normalizeFilesDirs, shortname, Process
 from cwltool.load_tool import fetch_document
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
 from cwltool.utils import aslist
@@ -356,23 +356,28 @@ def upload_workflow_collection(arvrunner, name, packed):
     return collection.portable_data_hash()
 
 
-class Runner(object):
+class Runner(Process):
     """Base class for runner processes, which submit an instance of
     arvados-cwl-runner and wait for the final result."""
 
-    def __init__(self, runner, tool, job_order, enable_reuse,
+    def __init__(self, runner, tool, loadingContext, enable_reuse,
                  output_name, output_tags, submit_runner_ram=0,
                  name=None, on_error=None, submit_runner_image=None,
                  intermediate_output_ttl=0, merged_map=None,
-                 priority=None, secret_store=None):
+                 priority=None, secret_store=None,
+                 collection_cache_size=256,
+                 collection_cache_is_default=True):
+
+        super(Runner, self).__init__(tool.tool, loadingContext)
+
         self.arvrunner = runner
-        self.tool = tool
-        self.job_order = job_order
+        self.embedded_tool = tool
+        self.job_order = None
         self.running = False
         if enable_reuse:
             # If reuse is permitted by command line arguments but
             # disabled by the workflow itself, disable it.
-            reuse_req, _ = self.tool.get_requirement("http://arvados.org/cwl#ReuseRequirement")
+            reuse_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#ReuseRequirement")
             if reuse_req:
                 enable_reuse = reuse_req["enableReuse"]
         self.enable_reuse = enable_reuse
@@ -389,13 +394,16 @@ class Runner(object):
 
         self.submit_runner_cores = 1
         self.submit_runner_ram = 1024  # defaut 1 GiB
+        self.collection_cache_size = collection_cache_size
 
-        runner_resource_req, _ = self.tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
+        runner_resource_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
         if runner_resource_req:
             if runner_resource_req.get("coresMin"):
                 self.submit_runner_cores = runner_resource_req["coresMin"]
             if runner_resource_req.get("ramMin"):
                 self.submit_runner_ram = runner_resource_req["ramMin"]
+            if runner_resource_req.get("keep_cache") and collection_cache_is_default:
+                self.collection_cache_size = runner_resource_req["keep_cache"]
 
         if submit_runner_ram:
             # Command line / initializer overrides default and/or spec from workflow
@@ -409,6 +417,15 @@ class Runner(object):
 
         self.merged_map = merged_map or {}
 
+    def job(self,
+            job_order,         # type: Mapping[Text, Text]
+            output_callbacks,  # type: Callable[[Any, Any], Any]
+            runtimeContext     # type: RuntimeContext
+           ):  # type: (...) -> Generator[Any, None, None]
+        self.job_order = job_order
+        self._init_job(job_order, runtimeContext)
+        yield self
+
     def update_pipeline_component(self, record):
         pass
 
index b9fd09807b452c1b06738ef1a7df72fd9dcc8708..1c233fac0ad98f4b0421a4e0856b00fd19d1422f 100644 (file)
@@ -11,7 +11,7 @@ logger = logging.getLogger('arvados.cwl-runner')
 class TaskQueue(object):
     def __init__(self, lock, thread_count):
         self.thread_count = thread_count
-        self.task_queue = Queue.Queue()
+        self.task_queue = Queue.Queue(maxsize=self.thread_count)
         self.task_queue_threads = []
         self.lock = lock
         self.in_flight = 0
@@ -23,27 +23,39 @@ class TaskQueue(object):
             t.start()
 
     def task_queue_func(self):
+        while True:
+            task = self.task_queue.get()
+            if task is None:
+                return
+            try:
+                task()
+            except Exception as e:
+                logger.exception("Unhandled exception running task")
+                self.error = e
 
-            while True:
-                task = self.task_queue.get()
-                if task is None:
-                    return
-                try:
-                    task()
-                except Exception as e:
-                    logger.exception("Unhandled exception running task")
-                    self.error = e
-
-                with self.lock:
-                    self.in_flight -= 1
-
-    def add(self, task):
-        with self.lock:
-            if self.thread_count > 1:
+            with self.lock:
+                self.in_flight -= 1
+
+    def add(self, task, unlock, check_done):
+        if self.thread_count > 1:
+            with self.lock:
                 self.in_flight += 1
-                self.task_queue.put(task)
-            else:
-                task()
+        else:
+            task()
+            return
+
+        while True:
+            try:
+                unlock.release()
+                if check_done.is_set():
+                    return
+                self.task_queue.put(task, block=True, timeout=3)
+                return
+            except Queue.Full:
+                pass
+            finally:
+                unlock.acquire()
+
 
     def drain(self):
         try:
index 88cf1ed7caa1da04fd5a1794c616cd5a0f2039b3..d13dd5ec538e678268d7b79836d745ba89d46047 100644 (file)
@@ -10,9 +10,9 @@ import re
 SETUP_DIR = os.path.dirname(__file__) or '.'
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def choose_version_from():
     sdk_ts = subprocess.check_output(
index 8ccb6645de8c78ccf77d3e049fa1b1e6257e5c91..4dc8448476123934dae7193fe680141671a2b7ec 100644 (file)
@@ -29,9 +29,9 @@ class EggInfoFromGit(egg_info):
     from source package), leave it alone.
     """
     def git_latest_tag(self):
-        gitinfo = subprocess.check_output(
-            ['git', 'describe', '--abbrev=0']).strip()
-        return str(gitinfo.decode('utf-8'))
+        gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+        gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+        return str(next(iter(gittags)).decode('utf-8'))
 
     def git_timestamp_tag(self):
         gitinfo = subprocess.check_output(
index 5d373282b6ca8ba3129b28c78e1bbe8934fda005..9d25a562ab32d09dcdfba627fc2089260879cce1 100644 (file)
@@ -37,7 +37,7 @@ setup(name='arvados-cwl-runner',
           'schema-salad==2.7.20181116024232',
           'typing >= 3.6.4',
           'ruamel.yaml >=0.15.54, <=0.15.77',
-          'arvados-python-client>=1.1.4.20180607143841',
+          'arvados-python-client>=1.2.1.20181130020805',
           'setuptools',
           'ciso8601 >=1.0.6, <2.0.0',
           'subprocess32>=3.5.1',
diff --git a/sdk/cwl/tests/submit_test_job_missing.json b/sdk/cwl/tests/submit_test_job_missing.json
new file mode 100644 (file)
index 0000000..02d61fa
--- /dev/null
@@ -0,0 +1,14 @@
+{
+    "x": {
+        "class": "File",
+        "path": "input/blorp.txt"
+    },
+    "y": {
+        "class": "Directory",
+        "location": "keep:99999999999999999999999999999998+99",
+        "listing": [{
+            "class": "File",
+            "location": "keep:99999999999999999999999999999998+99/file1.txt"
+        }]
+    }
+}
index d52e948710188dfb16e5ce175f5eb317138c7449..f83612a8b01186d822eb00728a76d31569408ced 100644 (file)
@@ -36,34 +36,34 @@ class TestFsAccess(unittest.TestCase):
         cache = CollectionCache(mock.MagicMock(), mock.MagicMock(), 4)
         cr().manifest_text.return_value = 'x' * 524289
         self.assertEqual(0, cache.total)
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524289", cache.collections)
         self.assertEqual((524289*128)*1, cache.total)
 
-        c2 = cache.get("99999999999999999999999999999992+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c2 = cache.get("99999999999999999999999999999992+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524289", cache.collections)
         self.assertEqual((524289*128)*2, cache.total)
 
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524289", cache.collections)
         self.assertEqual((524289*128)*2, cache.total)
 
-        c3 = cache.get("99999999999999999999999999999993+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c3 = cache.get("99999999999999999999999999999993+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524289", cache.collections)
         self.assertEqual((524289*128)*3, cache.total)
 
-        c4 = cache.get("99999999999999999999999999999994+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c4 = cache.get("99999999999999999999999999999994+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524289", cache.collections)
         self.assertEqual((524289*128)*3, cache.total)
 
-        c5 = cache.get("99999999999999999999999999999995+99")
-        self.assertNotIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c5 = cache.get("99999999999999999999999999999995+524289")
+        self.assertNotIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524289", cache.collections)
         self.assertEqual((524289*128)*3, cache.total)
 
 
@@ -72,37 +72,37 @@ class TestFsAccess(unittest.TestCase):
         cache = CollectionCache(mock.MagicMock(), mock.MagicMock(), 4)
         cr().manifest_text.return_value = 'x' * 524287
         self.assertEqual(0, cache.total)
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*1, cache.total)
 
-        c2 = cache.get("99999999999999999999999999999992+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c2 = cache.get("99999999999999999999999999999992+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*2, cache.total)
 
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*2, cache.total)
 
-        c3 = cache.get("99999999999999999999999999999993+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c3 = cache.get("99999999999999999999999999999993+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*3, cache.total)
 
-        c4 = cache.get("99999999999999999999999999999994+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c4 = cache.get("99999999999999999999999999999994+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*4, cache.total)
 
-        c5 = cache.get("99999999999999999999999999999995+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c5 = cache.get("99999999999999999999999999999995+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*4, cache.total)
 
-        c6 = cache.get("99999999999999999999999999999996+99")
-        self.assertNotIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c6 = cache.get("99999999999999999999999999999996+524287")
+        self.assertNotIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524287", cache.collections)
         self.assertEqual((524287*128)*4, cache.total)
index 1b892a9836f209857995fbf4e94002e015dcd1cf..90dab01471ef61ab380955e6301a73306648edef 100644 (file)
@@ -273,8 +273,8 @@ def stubs(func):
             'state': 'Committed',
             'command': ['arvados-cwl-runner', '--local', '--api=containers',
                         '--no-log-timestamps', '--disable-validate',
-                        '--eval-timeout=20', '--thread-count=4',
-                        '--enable-reuse', '--debug', '--on-error=continue',
+                        '--eval-timeout=20', '--thread-count=1',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'],
             'name': 'submit_wf.cwl',
             'container_image': '999999999999999999999999999999d3+99',
@@ -283,7 +283,7 @@ def stubs(func):
             'runtime_constraints': {
                 'API': True,
                 'vcpus': 1,
-                'ram': 1024*1024*1024
+                'ram': (1024+256)*1024*1024
             },
             'use_existing': True,
             'properties': {},
@@ -558,8 +558,9 @@ class TestSubmit(unittest.TestCase):
         expect_container["command"] = [
             'arvados-cwl-runner', '--local', '--api=containers',
             '--no-log-timestamps', '--disable-validate',
-            '--eval-timeout=20', '--thread-count=4',
-            '--disable-reuse', '--debug', '--on-error=continue',
+            '--eval-timeout=20', '--thread-count=1',
+            '--disable-reuse', "--collection-cache-size=256",
+            '--debug', '--on-error=continue',
             '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
         expect_container["use_existing"] = False
 
@@ -583,8 +584,8 @@ class TestSubmit(unittest.TestCase):
         expect_container["command"] = [
             'arvados-cwl-runner', '--local', '--api=containers',
             '--no-log-timestamps', '--disable-validate',
-            '--eval-timeout=20', '--thread-count=4',
-            '--disable-reuse', '--debug', '--on-error=continue',
+            '--eval-timeout=20', '--thread-count=1',
+            '--disable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
             '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
         expect_container["use_existing"] = False
         expect_container["name"] = "submit_wf_no_reuse.cwl"
@@ -620,8 +621,9 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=stop',
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=stop',
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
         stubs.api.container_requests().create.assert_called_with(
@@ -646,8 +648,8 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse',
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256",
                                        "--output-name="+output_name, '--debug', '--on-error=continue',
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
         expect_container["output_name"] = output_name
@@ -672,8 +674,8 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', "--debug",
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256", "--debug",
                                        "--storage-classes=foo", '--on-error=continue',
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
@@ -739,8 +741,9 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256", '--debug',
+                                       '--on-error=continue',
                                        "--intermediate-output-ttl=3600",
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
@@ -764,8 +767,9 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=continue',
                                        "--trash-intermediate",
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
@@ -791,8 +795,8 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse',
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256",
                                        "--output-tags="+output_tags, '--debug', '--on-error=continue',
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
@@ -814,7 +818,7 @@ class TestSubmit(unittest.TestCase):
             logging.exception("")
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
-        expect_container["runtime_constraints"]["ram"] = 2048*1024*1024
+        expect_container["runtime_constraints"]["ram"] = (2048+256)*1024*1024
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
@@ -876,14 +880,14 @@ class TestSubmit(unittest.TestCase):
             'container_image': '999999999999999999999999999999d3+99',
             'command': ['arvados-cwl-runner', '--local', '--api=containers',
                         '--no-log-timestamps', '--disable-validate',
-                        '--eval-timeout=20', '--thread-count=4',
-                        '--enable-reuse', '--debug', '--on-error=continue',
+                        '--eval-timeout=20', '--thread-count=1',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
                         '/var/lib/cwl/workflow/expect_arvworkflow.cwl#main', '/var/lib/cwl/cwl.input.json'],
             'cwd': '/var/spool/cwl',
             'runtime_constraints': {
                 'API': True,
                 'vcpus': 1,
-                'ram': 1073741824
+                'ram': 1342177280
             },
             'use_existing': True,
             'properties': {},
@@ -998,14 +1002,14 @@ class TestSubmit(unittest.TestCase):
             'container_image': "999999999999999999999999999999d3+99",
             'command': ['arvados-cwl-runner', '--local', '--api=containers',
                         '--no-log-timestamps', '--disable-validate',
-                        '--eval-timeout=20', '--thread-count=4',
-                        '--enable-reuse', '--debug', '--on-error=continue',
+                        '--eval-timeout=20', '--thread-count=1',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'],
             'cwd': '/var/spool/cwl',
             'runtime_constraints': {
                 'API': True,
                 'vcpus': 1,
-                'ram': 1073741824
+                'ram': 1342177280
             },
             'use_existing': True,
             'properties': {
@@ -1041,6 +1045,23 @@ class TestSubmit(unittest.TestCase):
                          stubs.expect_container_request_uuid + '\n')
 
 
+    @stubs
+    def test_submit_missing_input(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+        self.assertEqual(exited, 0)
+
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job_missing.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+        self.assertEqual(exited, 1)
+
+
     @stubs
     def test_submit_container_project(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
@@ -1058,8 +1079,9 @@ class TestSubmit(unittest.TestCase):
         expect_container["owner_uuid"] = project_uuid
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       "--eval-timeout=20", "--thread-count=4",
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       "--eval-timeout=20", "--thread-count=1",
+                                       '--enable-reuse', "--collection-cache-size=256", '--debug',
+                                       '--on-error=continue',
                                        '--project-uuid='+project_uuid,
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
@@ -1084,8 +1106,9 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
-                                       '--eval-timeout=60.0', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--eval-timeout=60.0', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=continue',
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
         stubs.api.container_requests().create.assert_called_with(
@@ -1093,6 +1116,33 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual(capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
 
+    @stubs
+    def test_submit_container_collection_cache(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--collection-cache-size=500",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
+                                       '--no-log-timestamps', '--disable-validate',
+                                       '--eval-timeout=20', '--thread-count=1',
+                                       '--enable-reuse', "--collection-cache-size=500",
+                                       '--debug', '--on-error=continue',
+                                       '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+        expect_container["runtime_constraints"]["ram"] = (1024+500)*1024*1024
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
 
     @stubs
     def test_submit_container_thread_count(self, stubs):
@@ -1111,7 +1161,8 @@ class TestSubmit(unittest.TestCase):
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                        '--no-log-timestamps', '--disable-validate',
                                        '--eval-timeout=20', '--thread-count=20',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=continue',
                                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
         stubs.api.container_requests().create.assert_called_with(
@@ -1197,19 +1248,25 @@ class TestSubmit(unittest.TestCase):
         expect_container["runtime_constraints"] = {
             "API": True,
             "vcpus": 2,
-            "ram": 2000 * 2**20
+            "ram": (2000+512) * 2**20
         }
         expect_container["name"] = "submit_wf_runner_resources.cwl"
         expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][1]["hints"] = [
             {
                 "class": "http://arvados.org/cwl#WorkflowRunnerResources",
                 "coresMin": 2,
-                "ramMin": 2000
+                "ramMin": 2000,
+                "keep_cache": 512
             }
         ]
         expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][0]["$namespaces"] = {
             "arv": "http://arvados.org/cwl#",
         }
+        expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers',
+                        '--no-log-timestamps', '--disable-validate',
+                        '--eval-timeout=20', '--thread-count=1',
+                        '--enable-reuse', "--collection-cache-size=512", '--debug', '--on-error=continue',
+                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
@@ -1277,8 +1334,9 @@ class TestSubmit(unittest.TestCase):
                 "--no-log-timestamps",
                 "--disable-validate",
                 "--eval-timeout=20",
-                '--thread-count=4',
+                '--thread-count=1',
                 "--enable-reuse",
+                "--collection-cache-size=256",
                 '--debug',
                 "--on-error=continue",
                 "/var/lib/cwl/workflow.json#main",
@@ -1406,7 +1464,7 @@ class TestSubmit(unittest.TestCase):
             "properties": {},
             "runtime_constraints": {
                 "API": True,
-                "ram": 1073741824,
+                "ram": 1342177280,
                 "vcpus": 1
             },
             "secret_mounts": {
index 2afbe0cff25f3d26e63253e697f3238468680e0f..a094890650e1a3049f177e9f01ec2330df7c7451 100644 (file)
@@ -22,29 +22,37 @@ def fail_task():
 class TestTaskQueue(unittest.TestCase):
     def test_tq(self):
         tq = TaskQueue(threading.Lock(), 2)
+        try:
+            self.assertIsNone(tq.error)
 
-        self.assertIsNone(tq.error)
-
-        tq.add(success_task)
-        tq.add(success_task)
-        tq.add(success_task)
-        tq.add(success_task)
+            unlock = threading.Lock()
+            unlock.acquire()
+            check_done = threading.Event()
 
-        tq.join()
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+        finally:
+            tq.join()
 
         self.assertIsNone(tq.error)
 
 
     def test_tq_error(self):
         tq = TaskQueue(threading.Lock(), 2)
-
-        self.assertIsNone(tq.error)
-
-        tq.add(success_task)
-        tq.add(success_task)
-        tq.add(fail_task)
-        tq.add(success_task)
-
-        tq.join()
+        try:
+            self.assertIsNone(tq.error)
+
+            unlock = threading.Lock()
+            unlock.acquire()
+            check_done = threading.Event()
+
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+            tq.add(fail_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+        finally:
+            tq.join()
 
         self.assertIsNotNone(tq.error)
index 9e2712194950627d87c148b76fae14d00f5fac2b..814cd07ab5d0833a5a374e503b6ee1feae00ef87 100644 (file)
@@ -15,6 +15,7 @@ hints:
   arv:WorkflowRunnerResources:
     ramMin: 2000
     coresMin: 2
+    keep_cache: 512
 inputs:
   - id: x
     type: File
index a24d53dad6a629f9d08692bb19dd62e144655a7b..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
@@ -8,9 +8,9 @@ import os
 import re
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def git_timestamp_tag():
     gitinfo = subprocess.check_output(
index a24d53dad6a629f9d08692bb19dd62e144655a7b..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
@@ -8,9 +8,9 @@ import os
 import re
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def git_timestamp_tag():
     gitinfo = subprocess.check_output(
index ccf25c422e62085e1edf3829459f5cdb8a8710ff..f3278fcc1d5e7aeab1f6748f90bc80040e6fce37 100644 (file)
@@ -13,9 +13,9 @@ class EggInfoFromGit(egg_info):
     from source package), leave it alone.
     """
     def git_latest_tag(self):
-        gitinfo = subprocess.check_output(
-            ['git', 'describe', '--abbrev=0']).strip()
-        return str(gitinfo.decode('utf-8'))
+        gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+        gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+        return str(next(iter(gittags)).decode('utf-8'))
 
     def git_timestamp_tag(self):
         gitinfo = subprocess.check_output(
index 609af6e23dda07b2467f6cc78dfe3f69ae00bb65..da919309f4e829f227f3241eb7d41759087dde08 100644 (file)
@@ -7,7 +7,7 @@ if not File.exist?('/usr/bin/git') then
   exit
 end
 
-git_latest_tag = `git describe --abbrev=0`
+git_latest_tag = `git tag -l |sort -V -r |head -n1`
 git_latest_tag = git_latest_tag.encode('utf-8').strip
 git_timestamp, git_hash = `git log -n1 --first-parent --format=%ct:%H .`.chomp.split(":")
 git_timestamp = Time.at(git_timestamp.to_i).utc
index cc15a56f35325f56ea5762c050aa4494f5e5a5d4..93d5b9a0239753a8820d86b883abcbdf1a06b776 100644 (file)
@@ -274,9 +274,8 @@ class ArvadosModel < ActiveRecord::Base
       if !include_trash
         if sql_table != "api_client_authorizations"
           # Only include records where the owner is not trashed
-          sql_conds = "NOT EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
-                      "WHERE trashed = 1 AND "+
-                      "(#{sql_table}.owner_uuid = target_uuid)) #{exclude_trashed_records}"
+          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+                      "WHERE trashed = 1) #{exclude_trashed_records}"
         end
       end
     else
@@ -294,14 +293,14 @@ class ArvadosModel < ActiveRecord::Base
       # see issue 13208 for details.
 
       # Match a direct read permission link from the user to the record uuid
-      direct_check = "EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
-                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_uuid = #{sql_table}.uuid)"
+      direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check})"
 
       # Match a read permission link from the user to the record's owner_uuid
       owner_check = ""
       if sql_table != "api_client_authorizations" and sql_table != "groups" then
-        owner_check = "OR EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
-          "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL) "
+        owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+          "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_owner_uuid IS NOT NULL) "
       end
 
       links_cond = ""
@@ -403,7 +402,7 @@ class ArvadosModel < ActiveRecord::Base
       cast = serialized_attributes[column] ? '::text' : ''
       "coalesce(#{column}#{cast},'')"
     end
-    "to_tsvector('english', #{parts.join(" || ' ' || ")})"
+    "to_tsvector('english', substr(#{parts.join(" || ' ' || ")}, 0, 8000))"
   end
 
   def self.apply_filters query, filters
index ac67040edf799465c1dda671e0a4d0eb80cf9483..bd586907ee2eaf205616251be126bc7cf9c94b09 100644 (file)
@@ -279,14 +279,6 @@ class Container < ArvadosModel
     candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]), md5: true)
     log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
 
-    candidates = candidates.where('runtime_user_uuid = ? or (runtime_user_uuid is NULL and runtime_auth_scopes is NULL)',
-                                  attrs[:runtime_user_uuid])
-    log_reuse_info(candidates) { "after filtering on runtime_user_uuid #{attrs[:runtime_user_uuid].inspect}" }
-
-    candidates = candidates.where('runtime_auth_scopes = ? or (runtime_user_uuid is NULL and runtime_auth_scopes is NULL)',
-                                  SafeJSON.dump(attrs[:runtime_auth_scopes].sort))
-    log_reuse_info(candidates) { "after filtering on runtime_auth_scopes #{attrs[:runtime_auth_scopes].inspect}" }
-
     log_reuse_info { "checking for state=Complete with readable output and log..." }
 
     select_readable_pdh = Collection.
diff --git a/services/api/db/migrate/20180917200000_replace_full_text_indexes.rb b/services/api/db/migrate/20180917200000_replace_full_text_indexes.rb
new file mode 100644 (file)
index 0000000..b0eea9e
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require './db/migrate/20161213172944_full_text_search_indexes'
+
+class ReplaceFullTextIndexes < ActiveRecord::Migration
+  def up
+    FullTextSearchIndexes.new.up
+  end
+
+  def down
+  end
+end
index 5105914df0dbd04ab599790d934f03194021dccf..aa29a1cbb409d59542d0d037cbdf703f9c407ea5 100644 (file)
@@ -1631,7 +1631,7 @@ CREATE INDEX collection_index_on_properties ON public.collections USING gin (pro
 -- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, (''::character varying)::text))));
+CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000)));
 
 
 --
@@ -1645,7 +1645,7 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u
 -- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)));
+CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000)));
 
 
 --
@@ -1680,7 +1680,7 @@ CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
 -- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, (((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text))));
+CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000)));
 
 
 --
@@ -2653,7 +2653,7 @@ CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner
 -- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))));
+CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000)));
 
 
 --
@@ -2744,7 +2744,7 @@ CREATE INDEX permission_target_user_trashed_level ON public.materialized_permiss
 -- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
 
 
 --
@@ -2765,7 +2765,7 @@ CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_
 -- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
 
 
 --
@@ -2821,7 +2821,7 @@ CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btre
 -- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, (((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text))));
+CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000)));
 
 
 --
@@ -3187,6 +3187,8 @@ INSERT INTO schema_migrations (version) VALUES ('20180913175443');
 
 INSERT INTO schema_migrations (version) VALUES ('20180915155335');
 
+INSERT INTO schema_migrations (version) VALUES ('20180917200000');
+
 INSERT INTO schema_migrations (version) VALUES ('20180917205609');
 
 INSERT INTO schema_migrations (version) VALUES ('20180919001158');
index 90b4f13bf597b5b9ea306dec04b698e75fb98ae3..2a9ff5bf4cc6985a413f62a03d7b9555e9c0f938 100644 (file)
@@ -558,7 +558,8 @@ class ContainerTest < ActiveSupport::TestCase
     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:active).token}))
     assert_equal Container::Queued, c1.state
     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
-    assert_nil reused
+    # See #14584
+    assert_equal c1.uuid, reused.uuid
   end
 
   test "find_reusable method with nil runtime_token, then runtime_token with different user" do
@@ -567,7 +568,8 @@ class ContainerTest < ActiveSupport::TestCase
     c1, _ = minimal_new(common_attrs.merge({runtime_token: nil}))
     assert_equal Container::Queued, c1.state
     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
-    assert_nil reused
+    # See #14584
+    assert_equal c1.uuid, reused.uuid
   end
 
   test "find_reusable method with different runtime_token, different scope, same user" do
@@ -576,7 +578,8 @@ class ContainerTest < ActiveSupport::TestCase
     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:runtime_token_limited_scope).token}))
     assert_equal Container::Queued, c1.state
     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
-    assert_nil reused
+    # See #14584
+    assert_equal c1.uuid, reused.uuid
   end
 
   test "Container running" do
index a24d53dad6a629f9d08692bb19dd62e144655a7b..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
@@ -8,9 +8,9 @@ import os
 import re
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def git_timestamp_tag():
     gitinfo = subprocess.check_output(
index a24d53dad6a629f9d08692bb19dd62e144655a7b..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
@@ -8,9 +8,9 @@ import os
 import re
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def git_timestamp_tag():
     gitinfo = subprocess.check_output(
index f2c5735985a7131129c38469e2183ffb70ef10f6..605e8540ee1df59d3b96618ebef50f4b39567384 100644 (file)
@@ -7,7 +7,7 @@ if not File.exists?('/usr/bin/git') then
   exit
 end
 
-git_latest_tag = `git describe --abbrev=0`
+git_latest_tag = `git tag -l |sort -V -r |head -n1`
 git_latest_tag = git_latest_tag.encode('utf-8').strip
 git_timestamp, git_hash = `git log -n1 --first-parent --format=%ct:%H .`.chomp.split(":")
 git_timestamp = Time.at(git_timestamp.to_i).utc
index a24d53dad6a629f9d08692bb19dd62e144655a7b..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
@@ -8,9 +8,9 @@ import os
 import re
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def git_timestamp_tag():
     gitinfo = subprocess.check_output(
index a24d53dad6a629f9d08692bb19dd62e144655a7b..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
@@ -8,9 +8,9 @@ import os
 import re
 
 def git_latest_tag():
-    gitinfo = subprocess.check_output(
-        ['git', 'describe', '--abbrev=0']).strip()
-    return str(gitinfo.decode('utf-8'))
+    gittags = subprocess.check_output(['git', 'tag', '-l']).split()
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
+    return str(next(iter(gittags)).decode('utf-8'))
 
 def git_timestamp_tag():
     gitinfo = subprocess.check_output(