flash = {}
# set owner_uuid to current project, provided it is writable
- action_data = Oj.load(params['action_data'] || "{}")
+ action_data = Oj.safe_load(params['action_data'] || "{}")
if action_data['current_project_uuid'] and
current_project = Group.find?(action_data['current_project_uuid']) and
current_project.writable_by.andand.include?(current_user.uuid)
if params[:filters]
filters = params[:filters]
if filters.is_a? String
- filters = Oj.load filters
+ filters = Oj.safe_load filters
elsif filters.is_a? Array
filters = filters.collect do |filter|
if filter.is_a? String
# Accept filters[]=["foo","=","bar"]
- Oj.load filter
+ Oj.safe_load filter
else
# Accept filters=[["foo","=","bar"]]
filter
@updates.keys.each do |attr|
if @object.send(attr).is_a? Hash
if @updates[attr].is_a? String
- @updates[attr] = Oj.load @updates[attr]
+ @updates[attr] = Oj.safe_load @updates[attr]
end
if params[:merge] || params["merge_#{attr}".to_sym]
# Merge provided Hash with current Hash, instead of
"data-emptytext" => "none",
"data-placement" => "bottom",
"data-type" => "select",
- "data-source" => (opt_empty_selection + primary_type[:symbols].map {|i| {:value => i, :text => i} }).to_json,
+ "data-source" => (opt_empty_selection + primary_type[:symbols].map {|i| {:value => cwl_shortname(i), :text => cwl_shortname(i)} }).to_json,
"data-url" => url_for(action: "update", id: object.uuid, controller: object.class.to_s.pluralize.underscore, merge: true),
"data-title" => "Set value for #{cwl_shortname(input_schema[:id])}",
"data-name" => dn,
def initialize(request_url, api_response)
@api_status = api_response.status_code
@api_response_s = api_response.content
- @api_response = Oj.load(@api_response_s, :symbol_keys => true)
+ @api_response = Oj.strict_load(@api_response_s, :symbol_keys => true)
errors = @api_response[:errors]
if errors.respond_to?(:join)
errors = errors.join("\n\n")
end
begin
- resp = Oj.load(msg.content, :symbol_keys => true)
+ resp = Oj.strict_load(msg.content, :symbol_keys => true)
rescue Oj::ParseError
resp = nil
end
}
end
def json_response
- Oj.load(@response.body)
+ Oj.safe_load(@response.body)
end
end
<div class="releasenotes">
</notextile>
-h2(#main). development main (as of 2022-06-02)
+h2(#main). development main (as of 2022-08-09)
+
+"previous: Upgrading to 2.4.2":#v2_4_2
+
+h2(#v2_4_2). v2.4.2 (2022-08-09)
"previous: Upgrading to 2.4.1":#v2_4_1
+h3. GHSL-2022-063
+
+GitHub Security Lab (GHSL) reported a remote code execution (RCE) vulnerability in the Arvados Workbench that allows authenticated attackers to execute arbitrary code via specially crafted JSON payloads.
+
+This vulnerability is fixed in 2.4.2 ("#19316":https://dev.arvados.org/issues/19316).
+
+It is likely that this vulnerability exists in all versions of Arvados up to 2.4.1.
+
+This vulnerability is specific to the Ruby on Rails Workbench application ("Workbench 1"). We do not believe any other Arvados components, including the TypesScript browser-based Workbench application ("Workbench 2") or API Server, are vulnerable to this attack.
+
+h3. CVE-2022-31163 and CVE-2022-32224
+
+As a precaution, Arvados 2.4.2 has includes security updates for Ruby on Rails and the TZInfo Ruby gem. However, there are no known exploits in Arvados based on these CVEs.
+
+h3. Disable Sharing URLs UI
+
+There is now a configuration option @Workbench.DisableSharingURLsUI@ for admins to disable the user interface for "sharing link" feature (URLs which can be sent to users to access the data in a specific collection in Arvados without an Arvados account), for organizations where sharing links violate their data sharing policy.
+
h2(#v2_4_1). v2.4.1 (2022-06-02)
"previous: Upgrading to 2.4.0":#v2_4_0
<pre>
$ curl -O https://git.arvados.org/arvados.git/blob_plain/refs/heads/main:/tools/arvbox/bin/arvbox
$ chmod +x arvbox
-$ ./arvbox start localdemo latest
+$ ./arvbox start localdemo
+$ ./arvbox root-cert
$ ./arvbox adduser demouser demo@example.com
</pre>
-You can now log in as @demouser@ using the password you selected.
+You will then need to "install the arvbox root certificate":#root-cert . After that, you can now log in to Workbench as @demouser@ with the password you selected.
h2. Requirements
-* Linux 3.x+ and Docker 1.9+
+* Linux 3.x+ and Docker 1.10+
* Minimum of 3 GiB of RAM + additional memory to run jobs
* Minimum of 3 GiB of disk + storage for actual data
listusers list user logins
</pre>
-h2. Install root certificate
+h2(#root-cert). Install root certificate
Arvbox creates root certificate to authorize Arvbox services. Installing the root certificate into your web browser will prevent security errors when accessing Arvbox services with your web browser. Every Arvbox instance generates a new root signing key.
return
}
}
+ tarfilename := "sha256:" + imageSHA2 + ".tar"
diag.dotest(100, "uploading file via webdav", func() error {
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
if collection.UUID == "" {
return fmt.Errorf("skipping, no test collection")
}
- req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/sha256:"+imageSHA2+".tar", bytes.NewReader(HelloWorldDockerImage))
+ req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(HelloWorldDockerImage))
if err != nil {
return fmt.Errorf("BUG? http.NewRequest: %s", err)
}
fileurl string
}{
{false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "foo"},
- {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "testfile"},
+ {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + tarfilename},
{false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo"},
- {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/testfile"},
- {true, true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + "testfile"},
- {true, false, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/sha256:" + imageSHA2 + ".tar"},
+ {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/" + tarfilename},
+ {true, true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + tarfilename},
+ {true, false, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/" + tarfilename},
} {
diag.dotest(120+i, fmt.Sprintf("downloading from webdav (%s)", trial.fileurl), func() error {
if trial.needWildcard && !davWildcard {
# Load the new object
newobj = case global_opts[:format]
when 'json'
- Oj.load(newcontent)
+ Oj.safe_load(newcontent)
when 'yaml'
YAML.load(newcontent)
else
self.project_uuid = runtimeContext.project_uuid
# Upload local file references in the job order.
- job_order = upload_job_order(self, "%s input" % runtimeContext.name,
- updated_tool, job_order, runtimeContext)
+ with Perf(metrics, "upload_job_order"):
+ job_order = upload_job_order(self, "%s input" % runtimeContext.name,
+ updated_tool, job_order, runtimeContext)
# the last clause means: if it is a command line tool, and we
# are going to wait for the result, and always_submit_runner
loadingContext = self.loadingContext.copy()
loadingContext.do_validate = False
+ loadingContext.disable_js_validation = True
if submitting:
loadingContext.do_update = False
# Document may have been auto-updated. Reload the original
# document with updating disabled because we want to
# submit the document with its original CWL version, not
# the auto-updated one.
- tool = load_tool(updated_tool.tool["id"], loadingContext)
+ with Perf(metrics, "load_tool original"):
+ tool = load_tool(updated_tool.tool["id"], loadingContext)
else:
tool = updated_tool
# Upload direct dependencies of workflow steps, get back mapping of files to keep references.
# Also uploads docker images.
- merged_map = upload_workflow_deps(self, tool, runtimeContext)
+ logger.info("Uploading workflow dependencies")
+ with Perf(metrics, "upload_workflow_deps"):
+ merged_map = upload_workflow_deps(self, tool, runtimeContext)
# Recreate process object (ArvadosWorkflow or
# ArvadosCommandTool) because tool document may have been
loadingContext.loader = tool.doc_loader
loadingContext.avsc_names = tool.doc_schema
loadingContext.metadata = tool.metadata
- tool = load_tool(tool.tool, loadingContext)
+ with Perf(metrics, "load_tool"):
+ tool = load_tool(tool.tool, loadingContext)
if runtimeContext.update_workflow or runtimeContext.create_workflow:
# Create a pipeline template or workflow record and exit.
import copy
from collections import namedtuple
from io import StringIO
-from typing import Mapping, Sequence
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ MutableMapping,
+ Sequence,
+ MutableSequence,
+ Optional,
+ Set,
+ Sized,
+ Tuple,
+ Type,
+ Union,
+ cast,
+)
+from cwltool.utils import (
+ CWLObjectType,
+ CWLOutputAtomType,
+ CWLOutputType,
+)
if os.name == "posix" and sys.version_info[0] < 3:
import subprocess32 as subprocess
from ._version import __version__
from . import done
from . context import ArvRuntimeContext
+from .perf import Perf
logger = logging.getLogger('arvados.cwl-runner')
+metrics = logging.getLogger('arvados.cwl-runner.metrics')
def trim_anonymous_location(obj):
"""Remove 'location' field from File and Directory literals.
if sfname is None:
continue
- p_location = primary["location"]
- if "/" in p_location:
- sfpath = (
- p_location[0 : p_location.rindex("/") + 1]
- + sfname
- )
+ if isinstance(sfname, str):
+ p_location = primary["location"]
+ if "/" in p_location:
+ sfpath = (
+ p_location[0 : p_location.rindex("/") + 1]
+ + sfname
+ )
required = builder.do_eval(required, context=primary)
- if fsaccess.exists(sfpath):
- if pattern is not None:
- found.append({"location": sfpath, "class": "File"})
- else:
- found.append(sf)
- elif required:
- raise SourceLine(primary["secondaryFiles"], i, validate.ValidationException).makeError(
- "Required secondary file '%s' does not exist" % sfpath)
+ if isinstance(sfname, list) or isinstance(sfname, dict):
+ each = aslist(sfname)
+ for e in each:
+ if required and not fsaccess.exists(e.get("location")):
+ raise SourceLine(primary["secondaryFiles"], i, validate.ValidationException).makeError(
+ "Required secondary file '%s' does not exist" % e.get("location"))
+ found.extend(each)
+
+ if isinstance(sfname, str):
+ if fsaccess.exists(sfpath):
+ if pattern is not None:
+ found.append({"location": sfpath, "class": "File"})
+ else:
+ found.append(sf)
+ elif required:
+ raise SourceLine(primary["secondaryFiles"], i, validate.ValidationException).makeError(
+ "Required secondary file '%s' does not exist" % sfpath)
primary["secondaryFiles"] = cmap(found)
if discovered is not None:
def upload_dependencies(arvrunner, name, document_loader,
workflowobj, uri, loadref_run, runtimeContext,
- include_primary=True, discovered_secondaryfiles=None):
+ include_primary=True, discovered_secondaryfiles=None,
+ cache=None):
"""Upload the dependencies of the workflowobj document to Keep.
Returns a pathmapper object mapping local paths to keep references. Also
defrg, _ = urllib.parse.urldefrag(joined)
if defrg not in loaded:
loaded.add(defrg)
+ if cache is not None and defrg in cache:
+ return cache[defrg]
# Use fetch_text to get raw file (before preprocessing).
text = document_loader.fetch_text(defrg)
if isinstance(text, bytes):
else:
textIO = StringIO(text)
yamlloader = YAML(typ='safe', pure=True)
- return yamlloader.load(textIO)
+ result = yamlloader.load(textIO)
+ if cache is not None:
+ cache[defrg] = result
+ return result
else:
return {}
scanobj = workflowobj
if "id" in workflowobj and not workflowobj["id"].startswith("_:"):
- # Need raw file content (before preprocessing) to ensure
- # that external references in $include and $mixin are captured.
- scanobj = loadref("", workflowobj["id"])
+ defrg, _ = urllib.parse.urldefrag(workflowobj["id"])
+ if cache is not None and defrg not in cache:
+ # if we haven't seen this file before, want raw file
+ # content (before preprocessing) to ensure that external
+ # references like $include haven't already been inlined.
+ scanobj = loadref("", workflowobj["id"])
metadata = scanobj
- sc_result = scandeps(uri, scanobj,
- loadref_fields,
- set(("$include", "location")),
- loadref, urljoin=document_loader.fetcher.urljoin,
- nestdirs=False)
+ with Perf(metrics, "scandeps include, location"):
+ sc_result = scandeps(uri, scanobj,
+ loadref_fields,
+ set(("$include", "location")),
+ loadref, urljoin=document_loader.fetcher.urljoin,
+ nestdirs=False)
- optional_deps = scandeps(uri, scanobj,
- loadref_fields,
- set(("$schemas",)),
- loadref, urljoin=document_loader.fetcher.urljoin,
- nestdirs=False)
+ with Perf(metrics, "scandeps $schemas"):
+ optional_deps = scandeps(uri, scanobj,
+ loadref_fields,
+ set(("$schemas",)),
+ loadref, urljoin=document_loader.fetcher.urljoin,
+ nestdirs=False)
- sc_result.extend(optional_deps)
+ if sc_result is None:
+ sc_result = []
+
+ if optional_deps is None:
+ optional_deps = []
+
+ if optional_deps:
+ sc_result.extend(optional_deps)
sc = []
uuids = {}
sc.append(obj)
collect_uuids(obj)
- visit_class(workflowobj, ("File", "Directory"), collect_uuids)
- visit_class(sc_result, ("File", "Directory"), collect_uploads)
+ with Perf(metrics, "collect uuids"):
+ visit_class(workflowobj, ("File", "Directory"), collect_uuids)
+
+ with Perf(metrics, "collect uploads"):
+ visit_class(sc_result, ("File", "Directory"), collect_uploads)
# Resolve any collection uuids we found to portable data hashes
# and assign them to uuid_map
uuid_map = {}
fetch_uuids = list(uuids.keys())
- while fetch_uuids:
- # For a large number of fetch_uuids, API server may limit
- # response size, so keep fetching from API server has nothing
- # more to give us.
- lookups = arvrunner.api.collections().list(
- filters=[["uuid", "in", fetch_uuids]],
- count="none",
- select=["uuid", "portable_data_hash"]).execute(
- num_retries=arvrunner.num_retries)
+ with Perf(metrics, "fetch_uuids"):
+ while fetch_uuids:
+ # For a large number of fetch_uuids, API server may limit
+ # response size, so keep fetching from API server has nothing
+ # more to give us.
+ lookups = arvrunner.api.collections().list(
+ filters=[["uuid", "in", fetch_uuids]],
+ count="none",
+ select=["uuid", "portable_data_hash"]).execute(
+ num_retries=arvrunner.num_retries)
- if not lookups["items"]:
- break
+ if not lookups["items"]:
+ break
- for l in lookups["items"]:
- uuid_map[l["uuid"]] = l["portable_data_hash"]
+ for l in lookups["items"]:
+ uuid_map[l["uuid"]] = l["portable_data_hash"]
- fetch_uuids = [u for u in fetch_uuids if u not in uuid_map]
+ fetch_uuids = [u for u in fetch_uuids if u not in uuid_map]
normalizeFilesDirs(sc)
- if include_primary and "id" in workflowobj:
- sc.append({"class": "File", "location": workflowobj["id"]})
+ if "id" in workflowobj:
+ defrg, _ = urllib.parse.urldefrag(workflowobj["id"])
+ if include_primary:
+ # make sure it's included
+ sc.append({"class": "File", "location": defrg})
+ else:
+ # make sure it's excluded
+ sc = [d for d in sc if d.get("location") != defrg]
def visit_default(obj):
def defaults_are_optional(f):
else:
del discovered[d]
- mapper = ArvPathMapper(arvrunner, sc, "",
- "keep:%s",
- "keep:%s/%s",
- name=name,
- single_collection=True,
- optional_deps=optional_deps)
+ with Perf(metrics, "mapper"):
+ mapper = ArvPathMapper(arvrunner, sc, "",
+ "keep:%s",
+ "keep:%s/%s",
+ name=name,
+ single_collection=True,
+ optional_deps=optional_deps)
keeprefs = set()
def addkeepref(k):
p["location"] = "keep:%s%s" % (uuid_map[uuid], gp.groups()[1] if gp.groups()[1] else "")
p[collectionUUID] = uuid
- visit_class(workflowobj, ("File", "Directory"), setloc)
- visit_class(discovered, ("File", "Directory"), setloc)
+ with Perf(metrics, "setloc"):
+ visit_class(workflowobj, ("File", "Directory"), setloc)
+ visit_class(discovered, ("File", "Directory"), setloc)
if discovered_secondaryfiles is not None:
for d in discovered:
def upload_workflow_deps(arvrunner, tool, runtimeContext):
# Ensure that Docker images needed by this workflow are available
- upload_docker(arvrunner, tool, runtimeContext)
+ with Perf(metrics, "upload_docker"):
+ upload_docker(arvrunner, tool, runtimeContext)
document_loader = tool.doc_loader
merged_map = {}
-
+ tool_dep_cache = {}
def upload_tool_deps(deptool):
if "id" in deptool:
discovered_secondaryfiles = {}
- pm = upload_dependencies(arvrunner,
- "%s dependencies" % (shortname(deptool["id"])),
- document_loader,
- deptool,
- deptool["id"],
- False,
- runtimeContext,
- include_primary=False,
- discovered_secondaryfiles=discovered_secondaryfiles)
+ with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])):
+ pm = upload_dependencies(arvrunner,
+ "%s dependencies" % (shortname(deptool["id"])),
+ document_loader,
+ deptool,
+ deptool["id"],
+ False,
+ runtimeContext,
+ include_primary=False,
+ discovered_secondaryfiles=discovered_secondaryfiles,
+ cache=tool_dep_cache)
document_loader.idx[deptool["id"]] = deptool
toolmap = {}
for k,v in pm.items():
# file to determine what version of cwltool and schema-salad to
# build.
install_requires=[
- 'cwltool==3.1.20220224085855',
- 'schema-salad==8.2.20211116214159',
+ 'cwltool==3.1.20220623174452',
+ 'schema-salad==8.3.20220801194920',
'arvados-python-client{}'.format(pysdk_dep),
'setuptools',
'ciso8601 >= 2.0.0',
- 'networkx < 2.6'
+ 'networkx < 2.6',
+ 'msgpack==1.0.3'
],
data_files=[
('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
collection, filepath = h.determineCollection(fs, filepath)
}
if collection != nil {
- log = log.WithField("collection_uuid", collection.UUID).
- WithField("collection_file_path", filepath)
- props["collection_uuid"] = collection.UUID
+ log = log.WithField("collection_file_path", filepath)
props["collection_file_path"] = filepath
- // h.determineCollection populates the collection_uuid prop with the PDH, if
- // this collection is being accessed via PDH. In that case, blank the
- // collection_uuid field so that consumers of the log entries can rely on it
- // being a UUID, or blank. The PDH remains available via the
- // portable_data_hash property.
- if props["collection_uuid"] == collection.PortableDataHash {
- props["collection_uuid"] = ""
+ // h.determineCollection populates the collection_uuid
+ // prop with the PDH, if this collection is being
+ // accessed via PDH. For logging, we use a different
+ // field depending on whether it's a UUID or PDH.
+ if len(collection.UUID) > 32 {
+ log = log.WithField("portable_data_hash", collection.UUID)
+ props["portable_data_hash"] = collection.UUID
+ } else {
+ log = log.WithField("collection_uuid", collection.UUID)
+ props["collection_uuid"] = collection.UUID
}
}
if r.Method == "PUT" || r.Method == "POST" {
}
func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) {
- segments := strings.Split(path, "/")
- var i int
- for i = 0; i < len(segments); i++ {
- dir := append([]string{}, segments[0:i]...)
- dir = append(dir, ".arvados#collection")
- f, err := fs.OpenFile(strings.Join(dir, "/"), os.O_RDONLY, 0)
- if f != nil {
- defer f.Close()
- }
+ target := strings.TrimSuffix(path, "/")
+ for {
+ fi, err := fs.Stat(target)
if err != nil {
- if !os.IsNotExist(err) {
+ return nil, ""
+ }
+ switch src := fi.Sys().(type) {
+ case *arvados.Collection:
+ return src, strings.TrimPrefix(path[len(target):], "/")
+ case *arvados.Group:
+ return nil, ""
+ default:
+ if _, ok := src.(error); ok {
return nil, ""
}
- continue
}
- // err is nil so we found it.
- decoder := json.NewDecoder(f)
- var collection arvados.Collection
- err = decoder.Decode(&collection)
- if err != nil {
+ // Try parent
+ cut := strings.LastIndexByte(target, '/')
+ if cut < 0 {
return nil, ""
}
- return &collection, strings.Join(segments[i:], "/")
+ target = target[:cut]
}
- return nil, ""
}
# installer.sh will log in to each of these nodes and then provision
# it for the specified roles.
NODES=(
- [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+ [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell,shell
)
# External ports used by the Arvados services
# installer.sh will log in to each of these nodes and then provision
# it for the specified roles.
NODES=(
- [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+ [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell,shell
)
# Set this value when installing a cluster in a single host with a single
echo "extra_custom_certs_dir: /srv/salt/certs" > ${P_DIR}/extra_custom_certs.sls
echo "extra_custom_certs:" >> ${P_DIR}/extra_custom_certs.sls
- for c in controller websocket workbench workbench2 webshell keepweb keepproxy; do
+ for c in controller websocket workbench workbench2 webshell keepweb keepproxy shell; do
# Are we in a single-host-single-hostname env?
if [ "${USE_SINGLE_HOSTNAME}" = "yes" ]; then
# Are we in a single-host-single-hostname env?