Merge branch 'master' into 14012-arvput-check-cache

author Lucas Di Pentima <ldipentima@veritasgenetics.com>

Mon, 3 Dec 2018 19:55:54 +0000 (16:55 -0300)

committer Lucas Di Pentima <ldipentima@veritasgenetics.com>

Mon, 3 Dec 2018 19:55:54 +0000 (16:55 -0300)
author Lucas Di Pentima <ldipentima@veritasgenetics.com>
Mon, 3 Dec 2018 19:55:54 +0000 (16:55 -0300)
committer Lucas Di Pentima <ldipentima@veritasgenetics.com>
Mon, 3 Dec 2018 19:55:54 +0000 (16:55 -0300)
diff --git a/apps/workbench/app/controllers/work_units_controller.rb b/apps/workbench/app/controllers/work_units_controller.rb

index 8527b4d48cb717b941ab376b68255e917c5797a3..767762c81e3cd3d899bda0b3bce873cc97c390b9 100644 (file)
--- a/apps/workbench/app/controllers/work_units_controller.rb
+++ b/apps/workbench/app/controllers/work_units_controller.rb
@@ -85,12 +85,6 @@ class WorkUnitsController < ApplicationController
        attrs['state'] = "Uncommitted"
  
        # required
-      attrs['command'] = ["arvados-cwl-runner",
-                          "--local",
-                          "--api=containers",
-                          "--project-uuid=#{params['work_unit']['owner_uuid']}",
-                          "/var/lib/cwl/workflow.json#main",
-                          "/var/lib/cwl/cwl.input.json"]
        attrs['container_image'] = "arvados/jobs"
        attrs['cwd'] = "/var/spool/cwl"
        attrs['output_path'] = "/var/spool/cwl"
@@ -102,6 +96,7 @@ class WorkUnitsController < ApplicationController
          "API" => true
        }
  
+      keep_cache = 256
        input_defaults = {}
        if wf_json
          main = get_cwl_main(wf_json)
@@ -119,11 +114,22 @@ class WorkUnitsController < ApplicationController
                if hint[:ramMin]
                  runtime_constraints["ram"] = hint[:ramMin] * 1024 * 1024
                end
+              if hint[:keep_cache]
+                keep_cache = hint[:keep_cache]
+              end
              end
            end
          end
        end
  
+      attrs['command'] = ["arvados-cwl-runner",
+                          "--local",
+                          "--api=containers",
+                          "--project-uuid=#{params['work_unit']['owner_uuid']}",
+                          "--collection-keep-cache=#{keep_cache}",
+                          "/var/lib/cwl/workflow.json#main",
+                          "/var/lib/cwl/cwl.input.json"]
+
        # mounts
        mounts = {
          "/var/lib/cwl/cwl.input.json" => {
diff --git a/build/package-testing/test-package-python27-python-arvados-cwl-runner.sh b/build/package-testing/test-package-python27-python-arvados-cwl-runner.sh

index 0274c8f45ea520eaeb0e4aa453aaaba92f09d984..e499238d89eb2572af6beb6f9d9a05bce1dd8b31 100755 (executable)
--- a/build/package-testing/test-package-python27-python-arvados-cwl-runner.sh
+++ b/build/package-testing/test-package-python27-python-arvados-cwl-runner.sh
@@ -3,6 +3,10 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
+set -e
+
+arvados-cwl-runner --version
+
  exec python <<EOF
  import arvados_cwl
  print "arvados-cwl-runner version", arvados_cwl.__version__
diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid

index 7abc794e198d6910eb64bbf88da79d218c7e2b25..d62002237a7e7b1d43aa7c59f4ef1afa7bc38b84 100644 (file)
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -43,6 +43,7 @@ hints:
    arv:WorkflowRunnerResources:
      ramMin: 2048
      coresMin: 2
+    keep_cache: 512
    arv:ClusterTarget:
      cluster_id: clsr1
      project_uuid: clsr1-j7d0g-qxc4jcji7n4lafx
@@ -137,6 +138,7 @@ table(table table-bordered table-condensed).
  |_. Field |_. Type |_. Description |
  |ramMin|int|RAM, in mebibytes, to reserve for the arvados-cwl-runner process. Default 1 GiB|
  |coresMin|int|Number of cores to reserve to the arvados-cwl-runner process. Default 1 core.|
+|keep_cache|int|Size of collection metadata cache for the workflow runner, in MiB.  Default 256 MiB.  Will be added on to the RAM request when determining node size to request.|
  
  h2(#clustertarget). arv:ClusterTarget
  
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

index 9b814f534c11af95aa59b77cdb3eaaa737866195..225741f947e396e3dcc86ad67ca1cdc056e03cfc 100644 (file)
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -159,6 +159,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                           default=None,
                           metavar="CLUSTER_ID")
  
+    parser.add_argument("--collection-cache-size", type=int,
+                        default=None,
+                        help="Collection cache size (in MiB, default 256).")
+
      parser.add_argument("--name", type=str,
                          help="Name to use for workflow execution instance.",
                          default=None)
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml

index 902b1ffba299240438c60c8a0a866db598b2a101..dce1bd4d0247d2f56af8902f844814633b739b25 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -233,6 +233,13 @@ $graph:
        type: int?
        doc: Minimum cores allocated to cwl-runner
        jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
+    keep_cache:
+      type: int?
+      doc: |
+        Size of collection metadata cache for the workflow runner, in
+        MiB.  Default 256 MiB.  Will be added on to the RAM request
+        when determining node size to request.
+      jsonldPredicate: "http://arvados.org/cwl#RuntimeConstraints/keep_cache"
  
  - name: ClusterTarget
    type: record
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py

index f1ae65fc0f97e4b29f69fed0ca1fa3cf9ac78de2..4c49a449b2a68fdf1eaaa5cd674129ae257dfc6e 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -407,7 +407,7 @@ class RunnerContainer(Runner):
              "secret_mounts": secret_mounts,
              "runtime_constraints": {
                  "vcpus": math.ceil(self.submit_runner_cores),
-                "ram": math.ceil(1024*1024 * self.submit_runner_ram),
+                "ram": 1024*1024 * (math.ceil(self.submit_runner_ram) + math.ceil(self.collection_cache_size)),
                  "API": True
              },
              "use_existing": self.enable_reuse,
@@ -441,6 +441,7 @@ class RunnerContainer(Runner):
          # --eval-timeout is the timeout for javascript invocation
          # --parallel-task-count is the number of threads to use for job submission
          # --enable/disable-reuse sets desired job reuse
+        # --collection-cache-size sets aside memory to store collections
          command = ["arvados-cwl-runner",
                     "--local",
                     "--api=containers",
@@ -448,7 +449,8 @@ class RunnerContainer(Runner):
                     "--disable-validate",
                     "--eval-timeout=%s" % self.arvrunner.eval_timeout,
                     "--thread-count=%s" % self.arvrunner.thread_count,
-                   "--enable-reuse" if self.enable_reuse else "--disable-reuse"]
+                   "--enable-reuse" if self.enable_reuse else "--disable-reuse",
+                   "--collection-cache-size=%s" % self.collection_cache_size]
  
          if self.output_name:
              command.append("--output-name=" + self.output_name)
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py

index f514476b9099895d3739dbb34ab0a0c372bd7e73..eb78a25fedbd4754752ff8598d7e1faa6b1585db 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -155,7 +155,7 @@ class ArvadosWorkflow(Workflow):
  
      def job(self, joborder, output_callback, runtimeContext):
  
-        builder = self._init_job(joborder, runtimeContext)
+        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
          runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
  
          req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py

index 7831e1cfd0822abbcac5a77c460a33e8ff492714..8cfe22ad7b6619f1f02d95eaf71153e44e52fd01 100644 (file)
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -34,6 +34,7 @@ class ArvRuntimeContext(RuntimeContext):
          self.submit_runner_cluster = None
          self.cluster_target_id = 0
          self.always_submit_runner = False
+        self.collection_cache_size = 256
  
          super(ArvRuntimeContext, self).__init__(kwargs)
  
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py

index 6cac709260fcef6f464d5d7e54706305e883a685..9595b55915477b6cb8beb858e8cc8e4b89f3d603 100644 (file)
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -29,7 +29,7 @@ from .arvjob import RunnerJob, RunnerTemplate
  from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
  from .arvtool import ArvadosCommandTool, validate_cluster_target
  from .arvworkflow import ArvadosWorkflow, upload_workflow
-from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache
+from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache, pdh_size
  from .perf import Perf
  from .pathmapper import NoFollowPathMapper
  from .task_queue import TaskQueue
@@ -37,7 +37,7 @@ from .context import ArvLoadingContext, ArvRuntimeContext
  from ._version import __version__
  
  from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
-from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
+from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing, visit_class
  from cwltool.command_line_tool import compute_checksums
  
  logger = logging.getLogger('arvados.cwl-runner')
@@ -95,6 +95,7 @@ class ArvCwlExecutor(object):
              arvargs.output_name = None
              arvargs.output_tags = None
              arvargs.thread_count = 1
+            arvargs.collection_cache_size = None
  
          self.api = api_client
          self.processes = {}
@@ -116,13 +117,21 @@ class ArvCwlExecutor(object):
          self.thread_count = arvargs.thread_count
          self.poll_interval = 12
          self.loadingContext = None
+        self.should_estimate_cache_size = True
  
          if keep_client is not None:
              self.keep_client = keep_client
          else:
              self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
  
-        self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries)
+        if arvargs.collection_cache_size:
+            collection_cache_size = arvargs.collection_cache_size*1024*1024
+            self.should_estimate_cache_size = False
+        else:
+            collection_cache_size = 256*1024*1024
+
+        self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries,
+                                                cap=collection_cache_size)
  
          self.fetcher_constructor = partial(CollectionFetcher,
                                             api_client=self.api,
@@ -206,7 +215,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
  
  
      def start_run(self, runnable, runtimeContext):
-        self.task_queue.add(partial(runnable.run, runtimeContext))
+        self.task_queue.add(partial(runnable.run, runtimeContext),
+                            self.workflow_eval_lock, self.stop_polling)
  
      def process_submitted(self, container):
          with self.workflow_eval_lock:
@@ -216,7 +226,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
          with self.workflow_eval_lock:
              j = self.processes[uuid]
              logger.info("%s %s is %s", self.label(j), uuid, record["state"])
-            self.task_queue.add(partial(j.done, record))
+            self.task_queue.add(partial(j.done, record),
+                                self.workflow_eval_lock, self.stop_polling)
              del self.processes[uuid]
  
      def runtime_status_update(self, kind, message, detail=None):
@@ -584,6 +595,21 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
          if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
              raise Exception("--priority must be in the range 1..1000.")
  
+        if self.should_estimate_cache_size:
+            visited = set()
+            estimated_size = [0]
+            def estimate_collection_cache(obj):
+                if obj.get("location", "").startswith("keep:"):
+                    m = pdh_size.match(obj["location"][5:])
+                    if m and m.group(1) not in visited:
+                        visited.add(m.group(1))
+                        estimated_size[0] += int(m.group(2))
+            visit_class(job_order, ("File", "Directory"), estimate_collection_cache)
+            runtimeContext.collection_cache_size = max(((estimated_size[0]*192) / (1024*1024))+1, 256)
+            self.collection_cache.set_cap(runtimeContext.collection_cache_size*1024*1024)
+
+        logger.info("Using collection cache size %s MiB", runtimeContext.collection_cache_size)
+
          runnerjob = None
          if runtimeContext.submit:
              # Submit a runner job to run the workflow for us.
@@ -604,7 +630,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                                  intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
                                                  merged_map=merged_map,
                                                  priority=runtimeContext.priority,
-                                                secret_store=self.secret_store)
+                                                secret_store=self.secret_store,
+                                                collection_cache_size=runtimeContext.collection_cache_size,
+                                                collection_cache_is_default=self.should_estimate_cache_size)
              elif self.work_api == "jobs":
                  runnerjob = RunnerJob(self, tool, job_order, runtimeContext.enable_reuse,
                                        self.output_name,
@@ -676,6 +704,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                      else:
                          logger.error("Workflow is deadlocked, no runnable processes and not waiting on any pending processes.")
                          break
+
+                if self.stop_polling.is_set():
+                    break
+
                  loopperf.__enter__()
              loopperf.__exit__()
  
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py

index 5981268128486496f22d28b4928d0b90b5775e7e..0816ee8fc05b74198ae9abad69887905bf8113ee 100644 (file)
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -28,6 +28,8 @@ from schema_salad.ref_resolver import DefaultFetcher
  
  logger = logging.getLogger('arvados.cwl-runner')
  
+pdh_size = re.compile(r'([0-9a-f]{32})\+(\d+)(\+\S+)*')
+
  class CollectionCache(object):
      def __init__(self, api_client, keep_client, num_retries,
                   cap=256*1024*1024,
@@ -41,20 +43,26 @@ class CollectionCache(object):
          self.cap = cap
          self.min_entries = min_entries
  
-    def cap_cache(self):
-        if self.total > self.cap:
-            # ordered list iterates from oldest to newest
-            for pdh, v in self.collections.items():
-                if self.total < self.cap or len(self.collections) < self.min_entries:
-                    break
-                # cut it loose
-                logger.debug("Evicting collection reader %s from cache", pdh)
-                del self.collections[pdh]
-                self.total -= v[1]
+    def set_cap(self, cap):
+        self.cap = cap
+
+    def cap_cache(self, required):
+        # ordered dict iterates from oldest to newest
+        for pdh, v in self.collections.items():
+            available = self.cap - self.total
+            if available >= required or len(self.collections) < self.min_entries:
+                return
+            # cut it loose
+            logger.debug("Evicting collection reader %s from cache (cap %s total %s required %s)", pdh, self.cap, self.total, required)
+            del self.collections[pdh]
+            self.total -= v[1]
  
      def get(self, pdh):
          with self.lock:
              if pdh not in self.collections:
+                m = pdh_size.match(pdh)
+                if m:
+                    self.cap_cache(int(m.group(2)) * 128)
                  logger.debug("Creating collection reader for %s", pdh)
                  cr = arvados.collection.CollectionReader(pdh, api_client=self.api_client,
                                                           keep_client=self.keep_client,
@@ -62,7 +70,6 @@ class CollectionCache(object):
                  sz = len(cr.manifest_text()) * 128
                  self.collections[pdh] = (cr, sz)
                  self.total += sz
-                self.cap_cache()
              else:
                  cr, sz = self.collections[pdh]
                  # bump it to the back
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py

index a846f2b0016931dcd6a938c47f9572083963d2bc..6094cfe245872b1b58976901668bd80a8b5b91b0 100644 (file)
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -364,7 +364,9 @@ class Runner(object):
                   output_name, output_tags, submit_runner_ram=0,
                   name=None, on_error=None, submit_runner_image=None,
                   intermediate_output_ttl=0, merged_map=None,
-                 priority=None, secret_store=None):
+                 priority=None, secret_store=None,
+                 collection_cache_size=256,
+                 collection_cache_is_default=True):
          self.arvrunner = runner
          self.tool = tool
          self.job_order = job_order
@@ -389,6 +391,7 @@ class Runner(object):
  
          self.submit_runner_cores = 1
          self.submit_runner_ram = 1024  # defaut 1 GiB
+        self.collection_cache_size = collection_cache_size
  
          runner_resource_req, _ = self.tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
          if runner_resource_req:
@@ -396,6 +399,8 @@ class Runner(object):
                  self.submit_runner_cores = runner_resource_req["coresMin"]
              if runner_resource_req.get("ramMin"):
                  self.submit_runner_ram = runner_resource_req["ramMin"]
+            if runner_resource_req.get("keep_cache") and collection_cache_is_default:
+                self.collection_cache_size = runner_resource_req["keep_cache"]
  
          if submit_runner_ram:
              # Command line / initializer overrides default and/or spec from workflow
diff --git a/sdk/cwl/arvados_cwl/task_queue.py b/sdk/cwl/arvados_cwl/task_queue.py

index b9fd09807b452c1b06738ef1a7df72fd9dcc8708..1c233fac0ad98f4b0421a4e0856b00fd19d1422f 100644 (file)
--- a/sdk/cwl/arvados_cwl/task_queue.py
+++ b/sdk/cwl/arvados_cwl/task_queue.py
@@ -11,7 +11,7 @@ logger = logging.getLogger('arvados.cwl-runner')
  class TaskQueue(object):
      def __init__(self, lock, thread_count):
          self.thread_count = thread_count
-        self.task_queue = Queue.Queue()
+        self.task_queue = Queue.Queue(maxsize=self.thread_count)
          self.task_queue_threads = []
          self.lock = lock
          self.in_flight = 0
@@ -23,27 +23,39 @@ class TaskQueue(object):
              t.start()
  
      def task_queue_func(self):
+        while True:
+            task = self.task_queue.get()
+            if task is None:
+                return
+            try:
+                task()
+            except Exception as e:
+                logger.exception("Unhandled exception running task")
+                self.error = e
  
-            while True:
-                task = self.task_queue.get()
-                if task is None:
-                    return
-                try:
-                    task()
-                except Exception as e:
-                    logger.exception("Unhandled exception running task")
-                    self.error = e
-
-                with self.lock:
-                    self.in_flight -= 1
-
-    def add(self, task):
-        with self.lock:
-            if self.thread_count > 1:
+            with self.lock:
+                self.in_flight -= 1
+
+    def add(self, task, unlock, check_done):
+        if self.thread_count > 1:
+            with self.lock:
                  self.in_flight += 1
-                self.task_queue.put(task)
-            else:
-                task()
+        else:
+            task()
+            return
+
+        while True:
+            try:
+                unlock.release()
+                if check_done.is_set():
+                    return
+                self.task_queue.put(task, block=True, timeout=3)
+                return
+            except Queue.Full:
+                pass
+            finally:
+                unlock.acquire()
+
  
      def drain(self):
          try:
diff --git a/sdk/cwl/arvados_version.py b/sdk/cwl/arvados_version.py

index c2a4d735fd2aae6e593616c1f634029a5bcd4b59..d13dd5ec538e678268d7b79836d745ba89d46047 100644 (file)
--- a/sdk/cwl/arvados_version.py
+++ b/sdk/cwl/arvados_version.py
@@ -11,7 +11,7 @@ SETUP_DIR = os.path.dirname(__file__) or '.'
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def choose_version_from():
diff --git a/sdk/cwl/gittaggers.py b/sdk/cwl/gittaggers.py

index 873199d7e5a6ffc4ca5ffd480ef976c5dc9f7fe5..4dc8448476123934dae7193fe680141671a2b7ec 100644 (file)
--- a/sdk/cwl/gittaggers.py
+++ b/sdk/cwl/gittaggers.py
@@ -30,7 +30,7 @@ class EggInfoFromGit(egg_info):
      """
      def git_latest_tag(self):
          gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-        gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+        gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
          return str(next(iter(gittags)).decode('utf-8'))
  
      def git_timestamp_tag(self):
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py

index 5d373282b6ca8ba3129b28c78e1bbe8934fda005..9d25a562ab32d09dcdfba627fc2089260879cce1 100644 (file)
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -37,7 +37,7 @@ setup(name='arvados-cwl-runner',
            'schema-salad==2.7.20181116024232',
            'typing >= 3.6.4',
            'ruamel.yaml >=0.15.54, <=0.15.77',
-          'arvados-python-client>=1.1.4.20180607143841',
+          'arvados-python-client>=1.2.1.20181130020805',
            'setuptools',
            'ciso8601 >=1.0.6, <2.0.0',
            'subprocess32>=3.5.1',
diff --git a/sdk/cwl/tests/test_fsaccess.py b/sdk/cwl/tests/test_fsaccess.py

index d52e948710188dfb16e5ce175f5eb317138c7449..f83612a8b01186d822eb00728a76d31569408ced 100644 (file)
--- a/sdk/cwl/tests/test_fsaccess.py
+++ b/sdk/cwl/tests/test_fsaccess.py
@@ -36,34 +36,34 @@ class TestFsAccess(unittest.TestCase):
          cache = CollectionCache(mock.MagicMock(), mock.MagicMock(), 4)
          cr().manifest_text.return_value = 'x' * 524289
          self.assertEqual(0, cache.total)
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524289", cache.collections)
          self.assertEqual((524289*128)*1, cache.total)
  
-        c2 = cache.get("99999999999999999999999999999992+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c2 = cache.get("99999999999999999999999999999992+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524289", cache.collections)
          self.assertEqual((524289*128)*2, cache.total)
  
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524289", cache.collections)
          self.assertEqual((524289*128)*2, cache.total)
  
-        c3 = cache.get("99999999999999999999999999999993+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c3 = cache.get("99999999999999999999999999999993+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524289", cache.collections)
          self.assertEqual((524289*128)*3, cache.total)
  
-        c4 = cache.get("99999999999999999999999999999994+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c4 = cache.get("99999999999999999999999999999994+524289")
+        self.assertIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524289", cache.collections)
          self.assertEqual((524289*128)*3, cache.total)
  
-        c5 = cache.get("99999999999999999999999999999995+99")
-        self.assertNotIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c5 = cache.get("99999999999999999999999999999995+524289")
+        self.assertNotIn("99999999999999999999999999999991+524289", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524289", cache.collections)
          self.assertEqual((524289*128)*3, cache.total)
  
  
@@ -72,37 +72,37 @@ class TestFsAccess(unittest.TestCase):
          cache = CollectionCache(mock.MagicMock(), mock.MagicMock(), 4)
          cr().manifest_text.return_value = 'x' * 524287
          self.assertEqual(0, cache.total)
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*1, cache.total)
  
-        c2 = cache.get("99999999999999999999999999999992+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c2 = cache.get("99999999999999999999999999999992+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*2, cache.total)
  
-        c1 = cache.get("99999999999999999999999999999991+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c1 = cache.get("99999999999999999999999999999991+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*2, cache.total)
  
-        c3 = cache.get("99999999999999999999999999999993+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c3 = cache.get("99999999999999999999999999999993+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*3, cache.total)
  
-        c4 = cache.get("99999999999999999999999999999994+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertIn("99999999999999999999999999999992+99", cache.collections)
+        c4 = cache.get("99999999999999999999999999999994+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*4, cache.total)
  
-        c5 = cache.get("99999999999999999999999999999995+99")
-        self.assertIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c5 = cache.get("99999999999999999999999999999995+524287")
+        self.assertIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*4, cache.total)
  
-        c6 = cache.get("99999999999999999999999999999996+99")
-        self.assertNotIn("99999999999999999999999999999991+99", cache.collections)
-        self.assertNotIn("99999999999999999999999999999992+99", cache.collections)
+        c6 = cache.get("99999999999999999999999999999996+524287")
+        self.assertNotIn("99999999999999999999999999999991+524287", cache.collections)
+        self.assertNotIn("99999999999999999999999999999992+524287", cache.collections)
          self.assertEqual((524287*128)*4, cache.total)
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py

index 1b892a9836f209857995fbf4e94002e015dcd1cf..8a40fbaf066fb9c42fc741a0138a809d5f9ed4ad 100644 (file)
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -274,7 +274,7 @@ def stubs(func):
              'command': ['arvados-cwl-runner', '--local', '--api=containers',
                          '--no-log-timestamps', '--disable-validate',
                          '--eval-timeout=20', '--thread-count=4',
-                        '--enable-reuse', '--debug', '--on-error=continue',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
                          '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'],
              'name': 'submit_wf.cwl',
              'container_image': '999999999999999999999999999999d3+99',
@@ -283,7 +283,7 @@ def stubs(func):
              'runtime_constraints': {
                  'API': True,
                  'vcpus': 1,
-                'ram': 1024*1024*1024
+                'ram': (1024+256)*1024*1024
              },
              'use_existing': True,
              'properties': {},
@@ -559,7 +559,8 @@ class TestSubmit(unittest.TestCase):
              'arvados-cwl-runner', '--local', '--api=containers',
              '--no-log-timestamps', '--disable-validate',
              '--eval-timeout=20', '--thread-count=4',
-            '--disable-reuse', '--debug', '--on-error=continue',
+            '--disable-reuse', "--collection-cache-size=256",
+            '--debug', '--on-error=continue',
              '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
          expect_container["use_existing"] = False
  
@@ -584,7 +585,7 @@ class TestSubmit(unittest.TestCase):
              'arvados-cwl-runner', '--local', '--api=containers',
              '--no-log-timestamps', '--disable-validate',
              '--eval-timeout=20', '--thread-count=4',
-            '--disable-reuse', '--debug', '--on-error=continue',
+            '--disable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
              '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
          expect_container["use_existing"] = False
          expect_container["name"] = "submit_wf_no_reuse.cwl"
@@ -621,7 +622,8 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=stop',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=stop',
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
          stubs.api.container_requests().create.assert_called_with(
@@ -647,7 +649,7 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse',
+                                       '--enable-reuse', "--collection-cache-size=256",
                                         "--output-name="+output_name, '--debug', '--on-error=continue',
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
          expect_container["output_name"] = output_name
@@ -673,7 +675,7 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', "--debug",
+                                       '--enable-reuse', "--collection-cache-size=256", "--debug",
                                         "--storage-classes=foo", '--on-error=continue',
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
@@ -740,7 +742,8 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--enable-reuse', "--collection-cache-size=256", '--debug',
+                                       '--on-error=continue',
                                         "--intermediate-output-ttl=3600",
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
@@ -765,7 +768,8 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=continue',
                                         "--trash-intermediate",
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
@@ -792,7 +796,7 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=4',
-                                       '--enable-reuse',
+                                       '--enable-reuse', "--collection-cache-size=256",
                                         "--output-tags="+output_tags, '--debug', '--on-error=continue',
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
@@ -814,7 +818,7 @@ class TestSubmit(unittest.TestCase):
              logging.exception("")
  
          expect_container = copy.deepcopy(stubs.expect_container_spec)
-        expect_container["runtime_constraints"]["ram"] = 2048*1024*1024
+        expect_container["runtime_constraints"]["ram"] = (2048+256)*1024*1024
  
          stubs.api.container_requests().create.assert_called_with(
              body=JsonDiffMatcher(expect_container))
@@ -877,13 +881,13 @@ class TestSubmit(unittest.TestCase):
              'command': ['arvados-cwl-runner', '--local', '--api=containers',
                          '--no-log-timestamps', '--disable-validate',
                          '--eval-timeout=20', '--thread-count=4',
-                        '--enable-reuse', '--debug', '--on-error=continue',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
                          '/var/lib/cwl/workflow/expect_arvworkflow.cwl#main', '/var/lib/cwl/cwl.input.json'],
              'cwd': '/var/spool/cwl',
              'runtime_constraints': {
                  'API': True,
                  'vcpus': 1,
-                'ram': 1073741824
+                'ram': 1342177280
              },
              'use_existing': True,
              'properties': {},
@@ -999,13 +1003,13 @@ class TestSubmit(unittest.TestCase):
              'command': ['arvados-cwl-runner', '--local', '--api=containers',
                          '--no-log-timestamps', '--disable-validate',
                          '--eval-timeout=20', '--thread-count=4',
-                        '--enable-reuse', '--debug', '--on-error=continue',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
                          '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'],
              'cwd': '/var/spool/cwl',
              'runtime_constraints': {
                  'API': True,
                  'vcpus': 1,
-                'ram': 1073741824
+                'ram': 1342177280
              },
              'use_existing': True,
              'properties': {
@@ -1059,7 +1063,8 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         "--eval-timeout=20", "--thread-count=4",
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--enable-reuse', "--collection-cache-size=256", '--debug',
+                                       '--on-error=continue',
                                         '--project-uuid='+project_uuid,
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
@@ -1085,7 +1090,8 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=60.0', '--thread-count=4',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=continue',
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
          stubs.api.container_requests().create.assert_called_with(
@@ -1093,6 +1099,33 @@ class TestSubmit(unittest.TestCase):
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_container_request_uuid + '\n')
  
+    @stubs
+    def test_submit_container_collection_cache(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--collection-cache-size=500",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
+                                       '--no-log-timestamps', '--disable-validate',
+                                       '--eval-timeout=20', '--thread-count=4',
+                                       '--enable-reuse', "--collection-cache-size=500",
+                                       '--debug', '--on-error=continue',
+                                       '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+        expect_container["runtime_constraints"]["ram"] = (1024+500)*1024*1024
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
  
      @stubs
      def test_submit_container_thread_count(self, stubs):
@@ -1111,7 +1144,8 @@ class TestSubmit(unittest.TestCase):
          expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
                                         '--no-log-timestamps', '--disable-validate',
                                         '--eval-timeout=20', '--thread-count=20',
-                                       '--enable-reuse', '--debug', '--on-error=continue',
+                                       '--enable-reuse', "--collection-cache-size=256",
+                                       '--debug', '--on-error=continue',
                                         '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
          stubs.api.container_requests().create.assert_called_with(
@@ -1197,19 +1231,25 @@ class TestSubmit(unittest.TestCase):
          expect_container["runtime_constraints"] = {
              "API": True,
              "vcpus": 2,
-            "ram": 2000 * 2**20
+            "ram": (2000+512) * 2**20
          }
          expect_container["name"] = "submit_wf_runner_resources.cwl"
          expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][1]["hints"] = [
              {
                  "class": "http://arvados.org/cwl#WorkflowRunnerResources",
                  "coresMin": 2,
-                "ramMin": 2000
+                "ramMin": 2000,
+                "keep_cache": 512
              }
          ]
          expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][0]["$namespaces"] = {
              "arv": "http://arvados.org/cwl#",
          }
+        expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers',
+                        '--no-log-timestamps', '--disable-validate',
+                        '--eval-timeout=20', '--thread-count=4',
+                        '--enable-reuse', "--collection-cache-size=512", '--debug', '--on-error=continue',
+                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
  
          stubs.api.container_requests().create.assert_called_with(
              body=JsonDiffMatcher(expect_container))
@@ -1279,6 +1319,7 @@ class TestSubmit(unittest.TestCase):
                  "--eval-timeout=20",
                  '--thread-count=4',
                  "--enable-reuse",
+                "--collection-cache-size=256",
                  '--debug',
                  "--on-error=continue",
                  "/var/lib/cwl/workflow.json#main",
@@ -1406,7 +1447,7 @@ class TestSubmit(unittest.TestCase):
              "properties": {},
              "runtime_constraints": {
                  "API": True,
-                "ram": 1073741824,
+                "ram": 1342177280,
                  "vcpus": 1
              },
              "secret_mounts": {
diff --git a/sdk/cwl/tests/test_tq.py b/sdk/cwl/tests/test_tq.py

index 2afbe0cff25f3d26e63253e697f3238468680e0f..a094890650e1a3049f177e9f01ec2330df7c7451 100644 (file)
--- a/sdk/cwl/tests/test_tq.py
+++ b/sdk/cwl/tests/test_tq.py
@@ -22,29 +22,37 @@ def fail_task():
  class TestTaskQueue(unittest.TestCase):
      def test_tq(self):
          tq = TaskQueue(threading.Lock(), 2)
+        try:
+            self.assertIsNone(tq.error)
  
-        self.assertIsNone(tq.error)
-
-        tq.add(success_task)
-        tq.add(success_task)
-        tq.add(success_task)
-        tq.add(success_task)
+            unlock = threading.Lock()
+            unlock.acquire()
+            check_done = threading.Event()
  
-        tq.join()
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+        finally:
+            tq.join()
  
          self.assertIsNone(tq.error)
  
  
      def test_tq_error(self):
          tq = TaskQueue(threading.Lock(), 2)
-
-        self.assertIsNone(tq.error)
-
-        tq.add(success_task)
-        tq.add(success_task)
-        tq.add(fail_task)
-        tq.add(success_task)
-
-        tq.join()
+        try:
+            self.assertIsNone(tq.error)
+
+            unlock = threading.Lock()
+            unlock.acquire()
+            check_done = threading.Event()
+
+            tq.add(success_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+            tq.add(fail_task, unlock, check_done)
+            tq.add(success_task, unlock, check_done)
+        finally:
+            tq.join()
  
          self.assertIsNotNone(tq.error)
diff --git a/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl b/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl

index 9e2712194950627d87c148b76fae14d00f5fac2b..814cd07ab5d0833a5a374e503b6ee1feae00ef87 100644 (file)
--- a/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl
+++ b/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl
@@ -15,6 +15,7 @@ hints:
    arv:WorkflowRunnerResources:
      ramMin: 2000
      coresMin: 2
+    keep_cache: 512
  inputs:
    - id: x
      type: File
diff --git a/sdk/pam/arvados_version.py b/sdk/pam/arvados_version.py

index 3be72b51f09f0da2ea9ace87531840ef84a2e35d..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
--- a/sdk/pam/arvados_version.py
+++ b/sdk/pam/arvados_version.py
@@ -9,7 +9,7 @@ import re
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def git_timestamp_tag():
diff --git a/sdk/python/arvados_version.py b/sdk/python/arvados_version.py

index 3be72b51f09f0da2ea9ace87531840ef84a2e35d..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
--- a/sdk/python/arvados_version.py
+++ b/sdk/python/arvados_version.py
@@ -9,7 +9,7 @@ import re
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def git_timestamp_tag():
diff --git a/sdk/python/gittaggers.py b/sdk/python/gittaggers.py

index 213d39a24b071e089a2d05115cddafa3403f3cb8..f3278fcc1d5e7aeab1f6748f90bc80040e6fce37 100644 (file)
--- a/sdk/python/gittaggers.py
+++ b/sdk/python/gittaggers.py
@@ -14,7 +14,7 @@ class EggInfoFromGit(egg_info):
      """
      def git_latest_tag(self):
          gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-        gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+        gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
          return str(next(iter(gittags)).decode('utf-8'))
  
      def git_timestamp_tag(self):
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index cc15a56f35325f56ea5762c050aa4494f5e5a5d4..4e7f8f0bf7b2672e0dc264022cd1d160e76d868c 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -403,7 +403,7 @@ class ArvadosModel < ActiveRecord::Base
        cast = serialized_attributes[column] ? '::text' : ''
        "coalesce(#{column}#{cast},'')"
      end
-    "to_tsvector('english', #{parts.join(" || ' ' || ")})"
+    "to_tsvector('english', substr(#{parts.join(" || ' ' || ")}, 0, 8000))"
    end
  
    def self.apply_filters query, filters
diff --git a/services/api/db/migrate/20180917200000_replace_full_text_indexes.rb b/services/api/db/migrate/20180917200000_replace_full_text_indexes.rb

new file mode 100644 (file)

index 0000000..b0eea9e
--- /dev/null
+++ b/services/api/db/migrate/20180917200000_replace_full_text_indexes.rb
@@ -0,0 +1,14 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require './db/migrate/20161213172944_full_text_search_indexes'
+
+class ReplaceFullTextIndexes < ActiveRecord::Migration
+  def up
+    FullTextSearchIndexes.new.up
+  end
+
+  def down
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index 5105914df0dbd04ab599790d934f03194021dccf..aa29a1cbb409d59542d0d037cbdf703f9c407ea5 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -1631,7 +1631,7 @@ CREATE INDEX collection_index_on_properties ON public.collections USING gin (pro
  -- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, (''::character varying)::text))));
+CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000)));
  
  
  --
@@ -1645,7 +1645,7 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u
  -- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)));
+CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000)));
  
  
  --
@@ -1680,7 +1680,7 @@ CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
  -- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, (((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text))));
+CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000)));
  
  
  --
@@ -2653,7 +2653,7 @@ CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner
  -- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))));
+CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000)));
  
  
  --
@@ -2744,7 +2744,7 @@ CREATE INDEX permission_target_user_trashed_level ON public.materialized_permiss
  -- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
  
  
  --
@@ -2765,7 +2765,7 @@ CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_
  -- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
  
  
  --
@@ -2821,7 +2821,7 @@ CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btre
  -- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, (((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text))));
+CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000)));
  
  
  --
@@ -3187,6 +3187,8 @@ INSERT INTO schema_migrations (version) VALUES ('20180913175443');
  
  INSERT INTO schema_migrations (version) VALUES ('20180915155335');
  
+INSERT INTO schema_migrations (version) VALUES ('20180917200000');
+
  INSERT INTO schema_migrations (version) VALUES ('20180917205609');
  
  INSERT INTO schema_migrations (version) VALUES ('20180919001158');
diff --git a/services/dockercleaner/arvados_version.py b/services/dockercleaner/arvados_version.py

index 3be72b51f09f0da2ea9ace87531840ef84a2e35d..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
--- a/services/dockercleaner/arvados_version.py
+++ b/services/dockercleaner/arvados_version.py
@@ -9,7 +9,7 @@ import re
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def git_timestamp_tag():
diff --git a/services/fuse/arvados_version.py b/services/fuse/arvados_version.py

index 3be72b51f09f0da2ea9ace87531840ef84a2e35d..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
--- a/services/fuse/arvados_version.py
+++ b/services/fuse/arvados_version.py
@@ -9,7 +9,7 @@ import re
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def git_timestamp_tag():
diff --git a/services/nodemanager/arvados_version.py b/services/nodemanager/arvados_version.py

index 3be72b51f09f0da2ea9ace87531840ef84a2e35d..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
--- a/services/nodemanager/arvados_version.py
+++ b/services/nodemanager/arvados_version.py
@@ -9,7 +9,7 @@ import re
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def git_timestamp_tag():
diff --git a/tools/crunchstat-summary/arvados_version.py b/tools/crunchstat-summary/arvados_version.py

index 3be72b51f09f0da2ea9ace87531840ef84a2e35d..2e6484cabdf1e71d39f5fe21139b29c2ce09ad93 100644 (file)
--- a/tools/crunchstat-summary/arvados_version.py
+++ b/tools/crunchstat-summary/arvados_version.py
@@ -9,7 +9,7 @@ import re
  
  def git_latest_tag():
      gittags = subprocess.check_output(['git', 'tag', '-l']).split()
-    gittags.sort(key=lambda s: map(int, s.split('.')),reverse=True)
+    gittags.sort(key=lambda s: [int(u) for u in s.split(b'.')],reverse=True)
      return str(next(iter(gittags)).decode('utf-8'))
  
  def git_timestamp_tag():
author	Lucas Di Pentima <ldipentima@veritasgenetics.com>
	Mon, 3 Dec 2018 19:55:54 +0000 (16:55 -0300)
committer	Lucas Di Pentima <ldipentima@veritasgenetics.com>
	Mon, 3 Dec 2018 19:55:54 +0000 (16:55 -0300)
apps/workbench/app/controllers/work_units_controller.rb		patch \| blob \| history
build/package-testing/test-package-python27-python-arvados-cwl-runner.sh		patch \| blob \| history
doc/user/cwl/cwl-extensions.html.textile.liquid		patch \| blob \| history
sdk/cwl/arvados_cwl/__init__.py		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arvcontainer.py		patch \| blob \| history
sdk/cwl/arvados_cwl/arvworkflow.py		patch \| blob \| history
sdk/cwl/arvados_cwl/context.py		patch \| blob \| history
sdk/cwl/arvados_cwl/executor.py		patch \| blob \| history
sdk/cwl/arvados_cwl/fsaccess.py		patch \| blob \| history
sdk/cwl/arvados_cwl/runner.py		patch \| blob \| history
sdk/cwl/arvados_cwl/task_queue.py		patch \| blob \| history
sdk/cwl/arvados_version.py		patch \| blob \| history
sdk/cwl/gittaggers.py		patch \| blob \| history
sdk/cwl/setup.py		patch \| blob \| history
sdk/cwl/tests/test_fsaccess.py		patch \| blob \| history
sdk/cwl/tests/test_submit.py		patch \| blob \| history
sdk/cwl/tests/test_tq.py		patch \| blob \| history
sdk/cwl/tests/wf/submit_wf_runner_resources.cwl		patch \| blob \| history
sdk/pam/arvados_version.py		patch \| blob \| history
sdk/python/arvados_version.py		patch \| blob \| history
sdk/python/gittaggers.py		patch \| blob \| history
services/api/app/models/arvados_model.rb		patch \| blob \| history
services/api/db/migrate/20180917200000_replace_full_text_indexes.rb	[new file with mode: 0644]	patch \| blob
services/api/db/structure.sql		patch \| blob \| history
services/dockercleaner/arvados_version.py		patch \| blob \| history
services/fuse/arvados_version.py		patch \| blob \| history
services/nodemanager/arvados_version.py		patch \| blob \| history
tools/crunchstat-summary/arvados_version.py		patch \| blob \| history