From 509084f45ffd17d740f7a26285210f8cde51f84c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 10 Aug 2020 13:17:57 -0400 Subject: [PATCH] 16353: Update cwltool for stable 1.2 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/__init__.py | 15 +- sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml | 206 ++++++++++++++++++++ sdk/cwl/setup.py | 4 +- 3 files changed, 214 insertions(+), 11 deletions(-) create mode 100644 sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index 341929454a..f3629b6897 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -226,15 +226,12 @@ def arg_parser(): # type: () -> argparse.ArgumentParser def add_arv_hints(): cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE - res10 = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-v1.0.yml') - res11 = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-v1.1.yml') - customschema10 = res10.read().decode('utf-8') - customschema11 = res11.read().decode('utf-8') - use_custom_schema("v1.0", "http://arvados.org/cwl", customschema10) - use_custom_schema("v1.1.0-dev1", "http://arvados.org/cwl", customschema11) - use_custom_schema("v1.1", "http://arvados.org/cwl", customschema11) - res10.close() - res11.close() + supported_versions = ["v1.0", "v1.1", "v1.2"] + for s in supported_versions: + res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema-%s.yml' % s) + customschema = res.read().decode('utf-8') + use_custom_schema(s, "http://arvados.org/cwl", customschema) + res.close() cwltool.process.supportedProcessRequirements.extend([ "http://arvados.org/cwl#RunInSingleContainer", "http://arvados.org/cwl#OutputDirType", diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml new file mode 100644 index 0000000000..b9b9e61651 --- /dev/null +++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml @@ -0,0 +1,206 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +$base: "http://arvados.org/cwl#" +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + cwltool: "http://commonwl.org/cwltool#" +$graph: +- $import: https://w3id.org/cwl/CommonWorkflowLanguage.yml + +- name: cwltool:Secrets + type: record + inVocab: false + extends: cwl:ProcessRequirement + fields: + class: + type: string + doc: "Always 'Secrets'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + secrets: + type: string[] + doc: | + List one or more input parameters that are sensitive (such as passwords) + which will be deliberately obscured from logging. + jsonldPredicate: + "_type": "@id" + refScope: 0 + +- name: RunInSingleContainer + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Indicates that a subworkflow should run in a single container + and not be scheduled as separate steps. + fields: + - name: class + type: string + doc: "Always 'arv:RunInSingleContainer'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + +- name: OutputDirType + type: enum + symbols: + - local_output_dir + - keep_output_dir + doc: + - | + local_output_dir: Use regular file system local to the compute node. + There must be sufficient local scratch space to store entire output; + specify this with `outdirMin` of `ResourceRequirement`. Files are + batch uploaded to Keep when the process completes. Most compatible, but + upload step can be time consuming for very large files. + - | + keep_output_dir: Use writable Keep mount. Files are streamed to Keep as + they are written. Does not consume local scratch space, but does consume + RAM for output buffers (up to 192 MiB per file simultaneously open for + writing.) Best suited to processes which produce sequential output of + large files (non-sequential writes may produced fragmented file + manifests). Supports regular files and directories, does not support + special files such as symlinks, hard links, named pipes, named sockets, + or device nodes. + + +- name: RuntimeConstraints + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Set Arvados-specific runtime hints. + fields: + - name: class + type: string + doc: "Always 'arv:RuntimeConstraints'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + - name: keep_cache + type: int? + doc: | + Size of file data buffer for Keep mount in MiB. Default is 256 + MiB. Increase this to reduce cache thrashing in situations such as + accessing multiple large (64+ MiB) files at the same time, or + performing random access on a large file. + - name: outputDirType + type: OutputDirType? + doc: | + Preferred backing store for output staging. If not specified, the + system may choose which one to use. + +- name: PartitionRequirement + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Select preferred compute partitions on which to run jobs. + fields: + - name: class + type: string + doc: "Always 'arv:PartitionRequirement'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + - name: partition + type: + - string + - string[] + +- name: APIRequirement + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Indicates that process wants to access to the Arvados API. Will be granted + limited network access and have ARVADOS_API_HOST and ARVADOS_API_TOKEN set + in the environment. + fields: + - name: class + type: string + doc: "Always 'arv:APIRequirement'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + +- name: IntermediateOutput + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Specify desired handling of intermediate output collections. + fields: + class: + type: string + doc: "Always 'arv:IntermediateOutput'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + outputTTL: + type: int + doc: | + If the value is greater than zero, consider intermediate output + collections to be temporary and should be automatically + trashed. Temporary collections will be trashed `outputTTL` seconds + after creation. A value of zero means intermediate output should be + retained indefinitely (this is the default behavior). + + Note: arvados-cwl-runner currently does not take workflow dependencies + into account when setting the TTL on an intermediate output + collection. If the TTL is too short, it is possible for a collection to + be trashed before downstream steps that consume it are started. The + recommended minimum value for TTL is the expected duration of the + entire the workflow. + +- name: WorkflowRunnerResources + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Specify memory or cores resource request for the CWL runner process itself. + fields: + class: + type: string + doc: "Always 'arv:WorkflowRunnerResources'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + ramMin: + type: int? + doc: Minimum RAM, in mebibytes (2**20) + jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/ramMin" + coresMin: + type: int? + doc: Minimum cores allocated to cwl-runner + jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin" + keep_cache: + type: int? + doc: | + Size of collection metadata cache for the workflow runner, in + MiB. Default 256 MiB. Will be added on to the RAM request + when determining node size to request. + jsonldPredicate: "http://arvados.org/cwl#RuntimeConstraints/keep_cache" + +- name: ClusterTarget + type: record + extends: cwl:ProcessRequirement + inVocab: false + doc: | + Specify where a workflow step should run + fields: + class: + type: string + doc: "Always 'arv:ClusterTarget'" + jsonldPredicate: + _id: "@type" + _type: "@vocab" + cluster_id: + type: string? + doc: The cluster to run the container + project_uuid: + type: string? + doc: The project that will own the container requests and intermediate collections diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index d703fcbc55..c8ab71e50b 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -30,7 +30,7 @@ setup(name='arvados-cwl-runner', download_url="https://github.com/arvados/arvados.git", license='Apache 2.0', packages=find_packages(), - package_data={'arvados_cwl': ['arv-cwl-schema-v1.0.yml', 'arv-cwl-schema-v1.1.yml']}, + package_data={'arvados_cwl': ['arv-cwl-schema-v1.0.yml', 'arv-cwl-schema-v1.1.yml', 'arv-cwl-schema-v1.2.yml']}, scripts=[ 'bin/cwl-runner', 'bin/arvados-cwl-runner', @@ -39,7 +39,7 @@ setup(name='arvados-cwl-runner', # file to determine what version of cwltool and schema-salad to # build. install_requires=[ - 'cwltool==3.0.20200720165847', + 'cwltool==3.0.20200807132242', 'schema-salad==7.0.20200612160654', 'arvados-python-client{}'.format(pysdk_dep), 'setuptools', -- 2.39.5