Merge branch '18205-container-cost'
authorTom Clegg <tom@curii.com>
Mon, 22 Aug 2022 18:45:10 +0000 (14:45 -0400)
committerTom Clegg <tom@curii.com>
Mon, 22 Aug 2022 18:45:10 +0000 (14:45 -0400)
refs #18205

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

16 files changed:
apps/workbench/app/controllers/actions_controller.rb
apps/workbench/app/controllers/application_controller.rb
apps/workbench/app/helpers/application_helper.rb
apps/workbench/app/models/arvados_api_client.rb
apps/workbench/test/test_helper.rb
doc/admin/upgrading.html.textile.liquid
doc/install/arvbox.html.textile.liquid
lib/diagnostics/cmd.go
sdk/cli/bin/arv
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
services/keep-web/handler.go
tools/salt-install/local.params.example.single_host_multiple_hostnames
tools/salt-install/local.params.example.single_host_single_hostname
tools/salt-install/provision.sh

index df489d2eebc997c9efc8f3c55236f46021657fd4..7b8c8eafc81d31cd566c6cef81e050b5ca521c9a 100644 (file)
@@ -167,7 +167,7 @@ class ActionsController < ApplicationController
     flash = {}
 
     # set owner_uuid to current project, provided it is writable
-    action_data = Oj.load(params['action_data'] || "{}")
+    action_data = Oj.safe_load(params['action_data'] || "{}")
     if action_data['current_project_uuid'] and
         current_project = Group.find?(action_data['current_project_uuid']) and
         current_project.writable_by.andand.include?(current_user.uuid)
index 7481575a6111c437761af84afa802018bfba9d63..c2636bf5d74868464da789bdb8a35551869ebc82 100644 (file)
@@ -152,12 +152,12 @@ class ApplicationController < ActionController::Base
     if params[:filters]
       filters = params[:filters]
       if filters.is_a? String
-        filters = Oj.load filters
+        filters = Oj.safe_load filters
       elsif filters.is_a? Array
         filters = filters.collect do |filter|
           if filter.is_a? String
             # Accept filters[]=["foo","=","bar"]
-            Oj.load filter
+            Oj.safe_load filter
           else
             # Accept filters=[["foo","=","bar"]]
             filter
@@ -361,7 +361,7 @@ class ApplicationController < ActionController::Base
     @updates.keys.each do |attr|
       if @object.send(attr).is_a? Hash
         if @updates[attr].is_a? String
-          @updates[attr] = Oj.load @updates[attr]
+          @updates[attr] = Oj.safe_load @updates[attr]
         end
         if params[:merge] || params["merge_#{attr}".to_sym]
           # Merge provided Hash with current Hash, instead of
index f22ab50166591cb0875f32bfdba368af42e91fc3..697c469b563f3553f83c15a1089c0f96745d6b48 100644 (file)
@@ -564,7 +564,7 @@ module ApplicationHelper
                      "data-emptytext" => "none",
                      "data-placement" => "bottom",
                      "data-type" => "select",
-                     "data-source" => (opt_empty_selection + primary_type[:symbols].map {|i| {:value => i, :text => i} }).to_json,
+                     "data-source" => (opt_empty_selection + primary_type[:symbols].map {|i| {:value => cwl_shortname(i), :text => cwl_shortname(i)} }).to_json,
                      "data-url" => url_for(action: "update", id: object.uuid, controller: object.class.to_s.pluralize.underscore, merge: true),
                      "data-title" => "Set value for #{cwl_shortname(input_schema[:id])}",
                      "data-name" => dn,
index 3c9bfa793daa5a8ba143cbbf8fc40122654b6fff..47fcc4ce51ffe1aafb288453b9d9c6a0b777bd0d 100644 (file)
@@ -40,7 +40,7 @@ class ArvadosApiClient
     def initialize(request_url, api_response)
       @api_status = api_response.status_code
       @api_response_s = api_response.content
-      @api_response = Oj.load(@api_response_s, :symbol_keys => true)
+      @api_response = Oj.strict_load(@api_response_s, :symbol_keys => true)
       errors = @api_response[:errors]
       if errors.respond_to?(:join)
         errors = errors.join("\n\n")
@@ -167,7 +167,7 @@ class ArvadosApiClient
     end
 
     begin
-      resp = Oj.load(msg.content, :symbol_keys => true)
+      resp = Oj.strict_load(msg.content, :symbol_keys => true)
     rescue Oj::ParseError
       resp = nil
     end
index 84728b8c6882082bbaf015edf2dcb2450674d61f..2e8ead94cdb5e46f905f8872ec1d7b84ddec1f87 100644 (file)
@@ -158,7 +158,7 @@ class ActiveSupport::TestCase
     }
   end
   def json_response
-    Oj.load(@response.body)
+    Oj.safe_load(@response.body)
   end
 end
 
index 96e68239b64620d37ca0b76b11c882946e3232ff..d0dc7cbd87a51d1db2798f2d2b89edd5084c6b59 100644 (file)
@@ -28,10 +28,32 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
-h2(#main). development main (as of 2022-06-02)
+h2(#main). development main (as of 2022-08-09)
+
+"previous: Upgrading to 2.4.2":#v2_4_2
+
+h2(#v2_4_2). v2.4.2 (2022-08-09)
 
 "previous: Upgrading to 2.4.1":#v2_4_1
 
+h3. GHSL-2022-063
+
+GitHub Security Lab (GHSL) reported a remote code execution (RCE) vulnerability in the Arvados Workbench that allows authenticated attackers to execute arbitrary code via specially crafted JSON payloads.
+
+This vulnerability is fixed in 2.4.2 ("#19316":https://dev.arvados.org/issues/19316).
+
+It is likely that this vulnerability exists in all versions of Arvados up to 2.4.1.
+
+This vulnerability is specific to the Ruby on Rails Workbench application ("Workbench 1").  We do not believe any other Arvados components, including the TypesScript browser-based Workbench application ("Workbench 2") or API Server, are vulnerable to this attack.
+
+h3. CVE-2022-31163 and CVE-2022-32224
+
+As a precaution, Arvados 2.4.2 has includes security updates for Ruby on Rails and the TZInfo Ruby gem.  However, there are no known exploits in Arvados based on these CVEs.
+
+h3. Disable Sharing URLs UI
+
+There is now a configuration option @Workbench.DisableSharingURLsUI@ for admins to disable the user interface for "sharing link" feature (URLs which can be sent to users to access the data in a specific collection in Arvados without an Arvados account), for organizations where sharing links violate their data sharing policy.
+
 h2(#v2_4_1). v2.4.1 (2022-06-02)
 
 "previous: Upgrading to 2.4.0":#v2_4_0
index a8235ee70e505e777c123370c16dbf952c933141..3c86721c5bffd5c9081afe3c9010c3764a9afcdb 100644 (file)
@@ -16,15 +16,16 @@ h2. Quick start
 <pre>
 $ curl -O https://git.arvados.org/arvados.git/blob_plain/refs/heads/main:/tools/arvbox/bin/arvbox
 $ chmod +x arvbox
-$ ./arvbox start localdemo latest
+$ ./arvbox start localdemo
+$ ./arvbox root-cert
 $ ./arvbox adduser demouser demo@example.com
 </pre>
 
-You can now log in as @demouser@ using the password you selected.
+You will then need to "install the arvbox root certificate":#root-cert .  After that, you can now log in to Workbench as @demouser@ with the password you selected.
 
 h2. Requirements
 
-* Linux 3.x+ and Docker 1.9+
+* Linux 3.x+ and Docker 1.10+
 * Minimum of 3 GiB of RAM  + additional memory to run jobs
 * Minimum of 3 GiB of disk + storage for actual data
 
@@ -68,7 +69,7 @@ removeuser <username>
 listusers          list user logins
 </pre>
 
-h2. Install root certificate
+h2(#root-cert). Install root certificate
 
 Arvbox creates root certificate to authorize Arvbox services.  Installing the root certificate into your web browser will prevent security errors when accessing Arvbox services with your web browser.  Every  Arvbox instance generates a new root signing key.
 
index 86adc325d7c9bffabd439728ced743c6ec930d58..799abf9da4e278bc7f2f4150e7f284c991c677c4 100644 (file)
@@ -397,6 +397,7 @@ func (diag *diagnoser) runtests() {
                        return
                }
        }
+       tarfilename := "sha256:" + imageSHA2 + ".tar"
 
        diag.dotest(100, "uploading file via webdav", func() error {
                ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
@@ -404,7 +405,7 @@ func (diag *diagnoser) runtests() {
                if collection.UUID == "" {
                        return fmt.Errorf("skipping, no test collection")
                }
-               req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/sha256:"+imageSHA2+".tar", bytes.NewReader(HelloWorldDockerImage))
+               req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(HelloWorldDockerImage))
                if err != nil {
                        return fmt.Errorf("BUG? http.NewRequest: %s", err)
                }
@@ -445,11 +446,11 @@ func (diag *diagnoser) runtests() {
                fileurl      string
        }{
                {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "foo"},
-               {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "testfile"},
+               {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + tarfilename},
                {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo"},
-               {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/testfile"},
-               {true, true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + "testfile"},
-               {true, false, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/sha256:" + imageSHA2 + ".tar"},
+               {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/" + tarfilename},
+               {true, true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + tarfilename},
+               {true, false, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/" + tarfilename},
        } {
                diag.dotest(120+i, fmt.Sprintf("downloading from webdav (%s)", trial.fileurl), func() error {
                        if trial.needWildcard && !davWildcard {
index f20268d19a525ca0056bceb54ea7a2036b50bb3a..e5aa4e4f2a83c8224372259012355228338be607 100755 (executable)
@@ -172,7 +172,7 @@ def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
         # Load the new object
         newobj = case global_opts[:format]
                  when 'json'
-                   Oj.load(newcontent)
+                   Oj.safe_load(newcontent)
                  when 'yaml'
                    YAML.load(newcontent)
                  else
index 778af58ac3f7a1b71c040d5ec4f3332ecba11964..3241fb607c5f06bf5030e901a27a110cdd43e7a6 100644 (file)
@@ -565,8 +565,9 @@ The 'jobs' API is no longer supported.
         self.project_uuid = runtimeContext.project_uuid
 
         # Upload local file references in the job order.
-        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
-                                     updated_tool, job_order, runtimeContext)
+        with Perf(metrics, "upload_job_order"):
+            job_order = upload_job_order(self, "%s input" % runtimeContext.name,
+                                         updated_tool, job_order, runtimeContext)
 
         # the last clause means: if it is a command line tool, and we
         # are going to wait for the result, and always_submit_runner
@@ -581,19 +582,23 @@ The 'jobs' API is no longer supported.
 
         loadingContext = self.loadingContext.copy()
         loadingContext.do_validate = False
+        loadingContext.disable_js_validation = True
         if submitting:
             loadingContext.do_update = False
             # Document may have been auto-updated. Reload the original
             # document with updating disabled because we want to
             # submit the document with its original CWL version, not
             # the auto-updated one.
-            tool = load_tool(updated_tool.tool["id"], loadingContext)
+            with Perf(metrics, "load_tool original"):
+                tool = load_tool(updated_tool.tool["id"], loadingContext)
         else:
             tool = updated_tool
 
         # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
         # Also uploads docker images.
-        merged_map = upload_workflow_deps(self, tool, runtimeContext)
+        logger.info("Uploading workflow dependencies")
+        with Perf(metrics, "upload_workflow_deps"):
+            merged_map = upload_workflow_deps(self, tool, runtimeContext)
 
         # Recreate process object (ArvadosWorkflow or
         # ArvadosCommandTool) because tool document may have been
@@ -602,7 +607,8 @@ The 'jobs' API is no longer supported.
         loadingContext.loader = tool.doc_loader
         loadingContext.avsc_names = tool.doc_schema
         loadingContext.metadata = tool.metadata
-        tool = load_tool(tool.tool, loadingContext)
+        with Perf(metrics, "load_tool"):
+            tool = load_tool(tool.tool, loadingContext)
 
         if runtimeContext.update_workflow or runtimeContext.create_workflow:
             # Create a pipeline template or workflow record and exit.
index 644713bce25385938df289dbdcb4cf68b77f3ca5..225f4ae60ed9b5cdfe83f9ffa240e8dcf08da5f5 100644 (file)
@@ -17,7 +17,30 @@ import json
 import copy
 from collections import namedtuple
 from io import StringIO
-from typing import Mapping, Sequence
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    MutableMapping,
+    Sequence,
+    MutableSequence,
+    Optional,
+    Set,
+    Sized,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
+from cwltool.utils import (
+    CWLObjectType,
+    CWLOutputAtomType,
+    CWLOutputType,
+)
 
 if os.name == "posix" and sys.version_info[0] < 3:
     import subprocess32 as subprocess
@@ -49,8 +72,10 @@ from .pathmapper import ArvPathMapper, trim_listing, collection_pdh_pattern, col
 from ._version import __version__
 from . import done
 from . context import ArvRuntimeContext
+from .perf import Perf
 
 logger = logging.getLogger('arvados.cwl-runner')
+metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
 def trim_anonymous_location(obj):
     """Remove 'location' field from File and Directory literals.
@@ -228,23 +253,33 @@ def set_secondary(fsaccess, builder, inputschema, secondaryspec, primary, discov
                 if sfname is None:
                     continue
 
-                p_location = primary["location"]
-                if "/" in p_location:
-                    sfpath = (
-                        p_location[0 : p_location.rindex("/") + 1]
-                        + sfname
-                    )
+                if isinstance(sfname, str):
+                    p_location = primary["location"]
+                    if "/" in p_location:
+                        sfpath = (
+                            p_location[0 : p_location.rindex("/") + 1]
+                            + sfname
+                        )
 
             required = builder.do_eval(required, context=primary)
 
-            if fsaccess.exists(sfpath):
-                if pattern is not None:
-                    found.append({"location": sfpath, "class": "File"})
-                else:
-                    found.append(sf)
-            elif required:
-                raise SourceLine(primary["secondaryFiles"], i, validate.ValidationException).makeError(
-                    "Required secondary file '%s' does not exist" % sfpath)
+            if isinstance(sfname, list) or isinstance(sfname, dict):
+                each = aslist(sfname)
+                for e in each:
+                    if required and not fsaccess.exists(e.get("location")):
+                        raise SourceLine(primary["secondaryFiles"], i, validate.ValidationException).makeError(
+                            "Required secondary file '%s' does not exist" % e.get("location"))
+                found.extend(each)
+
+            if isinstance(sfname, str):
+                if fsaccess.exists(sfpath):
+                    if pattern is not None:
+                        found.append({"location": sfpath, "class": "File"})
+                    else:
+                        found.append(sf)
+                elif required:
+                    raise SourceLine(primary["secondaryFiles"], i, validate.ValidationException).makeError(
+                        "Required secondary file '%s' does not exist" % sfpath)
 
         primary["secondaryFiles"] = cmap(found)
         if discovered is not None:
@@ -260,7 +295,8 @@ def discover_secondary_files(fsaccess, builder, inputs, job_order, discovered=No
 
 def upload_dependencies(arvrunner, name, document_loader,
                         workflowobj, uri, loadref_run, runtimeContext,
-                        include_primary=True, discovered_secondaryfiles=None):
+                        include_primary=True, discovered_secondaryfiles=None,
+                        cache=None):
     """Upload the dependencies of the workflowobj document to Keep.
 
     Returns a pathmapper object mapping local paths to keep references.  Also
@@ -279,6 +315,8 @@ def upload_dependencies(arvrunner, name, document_loader,
         defrg, _ = urllib.parse.urldefrag(joined)
         if defrg not in loaded:
             loaded.add(defrg)
+            if cache is not None and defrg in cache:
+                return cache[defrg]
             # Use fetch_text to get raw file (before preprocessing).
             text = document_loader.fetch_text(defrg)
             if isinstance(text, bytes):
@@ -286,7 +324,10 @@ def upload_dependencies(arvrunner, name, document_loader,
             else:
                 textIO = StringIO(text)
             yamlloader = YAML(typ='safe', pure=True)
-            return yamlloader.load(textIO)
+            result = yamlloader.load(textIO)
+            if cache is not None:
+                cache[defrg] = result
+            return result
         else:
             return {}
 
@@ -297,25 +338,37 @@ def upload_dependencies(arvrunner, name, document_loader,
 
     scanobj = workflowobj
     if "id" in workflowobj and not workflowobj["id"].startswith("_:"):
-        # Need raw file content (before preprocessing) to ensure
-        # that external references in $include and $mixin are captured.
-        scanobj = loadref("", workflowobj["id"])
+        defrg, _ = urllib.parse.urldefrag(workflowobj["id"])
+        if cache is not None and defrg not in cache:
+            # if we haven't seen this file before, want raw file
+            # content (before preprocessing) to ensure that external
+            # references like $include haven't already been inlined.
+            scanobj = loadref("", workflowobj["id"])
 
     metadata = scanobj
 
-    sc_result = scandeps(uri, scanobj,
-                         loadref_fields,
-                         set(("$include", "location")),
-                         loadref, urljoin=document_loader.fetcher.urljoin,
-                         nestdirs=False)
+    with Perf(metrics, "scandeps include, location"):
+        sc_result = scandeps(uri, scanobj,
+                             loadref_fields,
+                             set(("$include", "location")),
+                             loadref, urljoin=document_loader.fetcher.urljoin,
+                             nestdirs=False)
 
-    optional_deps = scandeps(uri, scanobj,
-                                  loadref_fields,
-                                  set(("$schemas",)),
-                                  loadref, urljoin=document_loader.fetcher.urljoin,
-                                  nestdirs=False)
+    with Perf(metrics, "scandeps $schemas"):
+        optional_deps = scandeps(uri, scanobj,
+                                      loadref_fields,
+                                      set(("$schemas",)),
+                                      loadref, urljoin=document_loader.fetcher.urljoin,
+                                      nestdirs=False)
 
-    sc_result.extend(optional_deps)
+    if sc_result is None:
+        sc_result = []
+
+    if optional_deps is None:
+        optional_deps = []
+
+    if optional_deps:
+        sc_result.extend(optional_deps)
 
     sc = []
     uuids = {}
@@ -343,35 +396,45 @@ def upload_dependencies(arvrunner, name, document_loader,
             sc.append(obj)
         collect_uuids(obj)
 
-    visit_class(workflowobj, ("File", "Directory"), collect_uuids)
-    visit_class(sc_result, ("File", "Directory"), collect_uploads)
+    with Perf(metrics, "collect uuids"):
+        visit_class(workflowobj, ("File", "Directory"), collect_uuids)
+
+    with Perf(metrics, "collect uploads"):
+        visit_class(sc_result, ("File", "Directory"), collect_uploads)
 
     # Resolve any collection uuids we found to portable data hashes
     # and assign them to uuid_map
     uuid_map = {}
     fetch_uuids = list(uuids.keys())
-    while fetch_uuids:
-        # For a large number of fetch_uuids, API server may limit
-        # response size, so keep fetching from API server has nothing
-        # more to give us.
-        lookups = arvrunner.api.collections().list(
-            filters=[["uuid", "in", fetch_uuids]],
-            count="none",
-            select=["uuid", "portable_data_hash"]).execute(
-                num_retries=arvrunner.num_retries)
+    with Perf(metrics, "fetch_uuids"):
+        while fetch_uuids:
+            # For a large number of fetch_uuids, API server may limit
+            # response size, so keep fetching from API server has nothing
+            # more to give us.
+            lookups = arvrunner.api.collections().list(
+                filters=[["uuid", "in", fetch_uuids]],
+                count="none",
+                select=["uuid", "portable_data_hash"]).execute(
+                    num_retries=arvrunner.num_retries)
 
-        if not lookups["items"]:
-            break
+            if not lookups["items"]:
+                break
 
-        for l in lookups["items"]:
-            uuid_map[l["uuid"]] = l["portable_data_hash"]
+            for l in lookups["items"]:
+                uuid_map[l["uuid"]] = l["portable_data_hash"]
 
-        fetch_uuids = [u for u in fetch_uuids if u not in uuid_map]
+            fetch_uuids = [u for u in fetch_uuids if u not in uuid_map]
 
     normalizeFilesDirs(sc)
 
-    if include_primary and "id" in workflowobj:
-        sc.append({"class": "File", "location": workflowobj["id"]})
+    if "id" in workflowobj:
+        defrg, _ = urllib.parse.urldefrag(workflowobj["id"])
+        if include_primary:
+            # make sure it's included
+            sc.append({"class": "File", "location": defrg})
+        else:
+            # make sure it's excluded
+            sc = [d for d in sc if d.get("location") != defrg]
 
     def visit_default(obj):
         def defaults_are_optional(f):
@@ -412,12 +475,13 @@ def upload_dependencies(arvrunner, name, document_loader,
         else:
             del discovered[d]
 
-    mapper = ArvPathMapper(arvrunner, sc, "",
-                           "keep:%s",
-                           "keep:%s/%s",
-                           name=name,
-                           single_collection=True,
-                           optional_deps=optional_deps)
+    with Perf(metrics, "mapper"):
+        mapper = ArvPathMapper(arvrunner, sc, "",
+                               "keep:%s",
+                               "keep:%s/%s",
+                               name=name,
+                               single_collection=True,
+                               optional_deps=optional_deps)
 
     keeprefs = set()
     def addkeepref(k):
@@ -461,8 +525,9 @@ def upload_dependencies(arvrunner, name, document_loader,
         p["location"] = "keep:%s%s" % (uuid_map[uuid], gp.groups()[1] if gp.groups()[1] else "")
         p[collectionUUID] = uuid
 
-    visit_class(workflowobj, ("File", "Directory"), setloc)
-    visit_class(discovered, ("File", "Directory"), setloc)
+    with Perf(metrics, "setloc"):
+        visit_class(workflowobj, ("File", "Directory"), setloc)
+        visit_class(discovered, ("File", "Directory"), setloc)
 
     if discovered_secondaryfiles is not None:
         for d in discovered:
@@ -647,24 +712,27 @@ FileUpdates = namedtuple("FileUpdates", ["resolved", "secondaryFiles"])
 def upload_workflow_deps(arvrunner, tool, runtimeContext):
     # Ensure that Docker images needed by this workflow are available
 
-    upload_docker(arvrunner, tool, runtimeContext)
+    with Perf(metrics, "upload_docker"):
+        upload_docker(arvrunner, tool, runtimeContext)
 
     document_loader = tool.doc_loader
 
     merged_map = {}
-
+    tool_dep_cache = {}
     def upload_tool_deps(deptool):
         if "id" in deptool:
             discovered_secondaryfiles = {}
-            pm = upload_dependencies(arvrunner,
-                                     "%s dependencies" % (shortname(deptool["id"])),
-                                     document_loader,
-                                     deptool,
-                                     deptool["id"],
-                                     False,
-                                     runtimeContext,
-                                     include_primary=False,
-                                     discovered_secondaryfiles=discovered_secondaryfiles)
+            with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])):
+                pm = upload_dependencies(arvrunner,
+                                         "%s dependencies" % (shortname(deptool["id"])),
+                                         document_loader,
+                                         deptool,
+                                         deptool["id"],
+                                         False,
+                                         runtimeContext,
+                                         include_primary=False,
+                                         discovered_secondaryfiles=discovered_secondaryfiles,
+                                         cache=tool_dep_cache)
             document_loader.idx[deptool["id"]] = deptool
             toolmap = {}
             for k,v in pm.items():
index c885ebd4b1303a1fb9cb02d6b5918719d2c01b16..66cda19f4012fc7754a4286d81dfe746c901cad6 100644 (file)
@@ -36,12 +36,13 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.1.20220224085855',
-          'schema-salad==8.2.20211116214159',
+          'cwltool==3.1.20220623174452',
+          'schema-salad==8.3.20220801194920',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',
           'ciso8601 >= 2.0.0',
-          'networkx < 2.6'
+          'networkx < 2.6',
+          'msgpack==1.0.3'
       ],
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
index 1f1f509860bb9950d95e5d9c566e9e57f9d4df36..3a1d9acde7e7d33208958e467931aef2b1474853 100644 (file)
@@ -909,17 +909,18 @@ func (h *handler) logUploadOrDownload(
                collection, filepath = h.determineCollection(fs, filepath)
        }
        if collection != nil {
-               log = log.WithField("collection_uuid", collection.UUID).
-                       WithField("collection_file_path", filepath)
-               props["collection_uuid"] = collection.UUID
+               log = log.WithField("collection_file_path", filepath)
                props["collection_file_path"] = filepath
-               // h.determineCollection populates the collection_uuid prop with the PDH, if
-               // this collection is being accessed via PDH. In that case, blank the
-               // collection_uuid field so that consumers of the log entries can rely on it
-               // being a UUID, or blank. The PDH remains available via the
-               // portable_data_hash property.
-               if props["collection_uuid"] == collection.PortableDataHash {
-                       props["collection_uuid"] = ""
+               // h.determineCollection populates the collection_uuid
+               // prop with the PDH, if this collection is being
+               // accessed via PDH. For logging, we use a different
+               // field depending on whether it's a UUID or PDH.
+               if len(collection.UUID) > 32 {
+                       log = log.WithField("portable_data_hash", collection.UUID)
+                       props["portable_data_hash"] = collection.UUID
+               } else {
+                       log = log.WithField("collection_uuid", collection.UUID)
+                       props["collection_uuid"] = collection.UUID
                }
        }
        if r.Method == "PUT" || r.Method == "POST" {
@@ -958,29 +959,27 @@ func (h *handler) logUploadOrDownload(
 }
 
 func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
-       segments := strings.Split(path, "/")
-       var i int
-       for i = 0; i < len(segments); i++ {
-               dir := append([]string{}, segments[0:i]...)
-               dir = append(dir, ".arvados#collection")
-               f, err := fs.OpenFile(strings.Join(dir, "/"), os.O_RDONLY, 0)
-               if f != nil {
-                       defer f.Close()
-               }
+       target := strings.TrimSuffix(path, "/")
+       for {
+               fi, err := fs.Stat(target)
                if err != nil {
-                       if !os.IsNotExist(err) {
+                       return nil, ""
+               }
+               switch src := fi.Sys().(type) {
+               case *arvados.Collection:
+                       return src, strings.TrimPrefix(path[len(target):], "/")
+               case *arvados.Group:
+                       return nil, ""
+               default:
+                       if _, ok := src.(error); ok {
                                return nil, ""
                        }
-                       continue
                }
-               // err is nil so we found it.
-               decoder := json.NewDecoder(f)
-               var collection arvados.Collection
-               err = decoder.Decode(&collection)
-               if err != nil {
+               // Try parent
+               cut := strings.LastIndexByte(target, '/')
+               if cut < 0 {
                        return nil, ""
                }
-               return &collection, strings.Join(segments[i:], "/")
+               target = target[:cut]
        }
-       return nil, ""
 }
index 20f334166e419ee806b608ac37fd3a27b10dca82..f072fedb40757f20af87ae1e21da86503960aad9 100644 (file)
@@ -19,7 +19,7 @@ DEPLOY_USER=root
 # installer.sh will log in to each of these nodes and then provision
 # it for the specified roles.
 NODES=(
-  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell,shell
 )
 
 # External ports used by the Arvados services
index a68450094161accb43ef472def621e15b20b2d79..fdb10cf63632fd0ec2a6df84e55552e831a66c5d 100644 (file)
@@ -19,7 +19,7 @@ DEPLOY_USER=root
 # installer.sh will log in to each of these nodes and then provision
 # it for the specified roles.
 NODES=(
-  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell,shell
 )
 
 # Set this value when installing a cluster in a single host with a single
index f4660be370990302cadbb9b3e11d8f2a44f5de10..638e5de800169bf9c8f3ea5641d61cbdff2e68f1 100755 (executable)
@@ -625,7 +625,7 @@ if [ -z "${ROLES}" ]; then
     echo "extra_custom_certs_dir: /srv/salt/certs" > ${P_DIR}/extra_custom_certs.sls
     echo "extra_custom_certs:" >> ${P_DIR}/extra_custom_certs.sls
 
-    for c in controller websocket workbench workbench2 webshell keepweb keepproxy; do
+    for c in controller websocket workbench workbench2 webshell keepweb keepproxy shell; do
       # Are we in a single-host-single-hostname env?
       if [ "${USE_SINGLE_HOSTNAME}" = "yes" ]; then
         # Are we in a single-host-single-hostname env?