sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml

   1 # Copyright (C) The Arvados Authors. All rights reserved.
   2 #
   3 # SPDX-License-Identifier: Apache-2.0
   4
   5 $base: "http://arvados.org/cwl#"
   6 $namespaces:
   7   cwl: "https://w3id.org/cwl/cwl#"
   8   cwltool: "http://commonwl.org/cwltool#"
   9 $graph:
  10 - $import: https://w3id.org/cwl/CommonWorkflowLanguage.yml
  11
  12 - name: cwltool:LoadListingRequirement
  13   type: record
  14   extends: cwl:ProcessRequirement
  15   inVocab: false
  16   fields:
  17     class:
  18       type: string
  19       doc: "Always 'LoadListingRequirement'"
  20       jsonldPredicate:
  21         "_id": "@type"
  22         "_type": "@vocab"
  23     loadListing:
  24       type:
  25         - "null"
  26         - type: enum
  27           name: LoadListingEnum
  28           symbols: [no_listing, shallow_listing, deep_listing]
  29
  30 - name: cwltool:Secrets
  31   type: record
  32   inVocab: false
  33   extends: cwl:ProcessRequirement
  34   fields:
  35     class:
  36       type: string
  37       doc: "Always 'Secrets'"
  38       jsonldPredicate:
  39         "_id": "@type"
  40         "_type": "@vocab"
  41     secrets:
  42       type: string[]
  43       doc: |
  44         List one or more input parameters that are sensitive (such as passwords)
  45         which will be deliberately obscured from logging.
  46       jsonldPredicate:
  47         "_type": "@id"
  48         refScope: 0
  49
  50 - name: cwltool:TimeLimit
  51   type: record
  52   inVocab: false
  53   extends: cwl:ProcessRequirement
  54   doc: |
  55     Set an upper limit on the execution time of a CommandLineTool or
  56     ExpressionTool.  A tool execution which exceeds the time limit may
  57     be preemptively terminated and considered failed.  May also be
  58     used by batch systems to make scheduling decisions.
  59   fields:
  60     - name: class
  61       type: string
  62       doc: "Always 'TimeLimit'"
  63       jsonldPredicate:
  64         "_id": "@type"
  65         "_type": "@vocab"
  66     - name: timelimit
  67       type: [long, string]
  68       doc: |
  69         The time limit, in seconds.  A time limit of zero means no
  70         time limit.  Negative time limits are an error.
  71
  72 - name: RunInSingleContainer
  73   type: record
  74   extends: cwl:ProcessRequirement
  75   inVocab: false
  76   doc: |
  77     Indicates that a subworkflow should run in a single container
  78     and not be scheduled as separate steps.
  79   fields:
  80     - name: class
  81       type: string
  82       doc: "Always 'arv:RunInSingleContainer'"
  83       jsonldPredicate:
  84         _id: "@type"
  85         _type: "@vocab"
  86
  87 - name: OutputDirType
  88   type: enum
  89   symbols:
  90     - local_output_dir
  91     - keep_output_dir
  92   doc:
  93     - |
  94       local_output_dir: Use regular file system local to the compute node.
  95       There must be sufficient local scratch space to store entire output;
  96       specify this with `outdirMin` of `ResourceRequirement`.  Files are
  97       batch uploaded to Keep when the process completes.  Most compatible, but
  98       upload step can be time consuming for very large files.
  99     - |
 100       keep_output_dir: Use writable Keep mount.  Files are streamed to Keep as
 101       they are written.  Does not consume local scratch space, but does consume
 102       RAM for output buffers (up to 192 MiB per file simultaneously open for
 103       writing.)  Best suited to processes which produce sequential output of
 104       large files (non-sequential writes may produced fragmented file
 105       manifests).  Supports regular files and directories, does not support
 106       special files such as symlinks, hard links, named pipes, named sockets,
 107       or device nodes.
 108
 109
 110 - name: RuntimeConstraints
 111   type: record
 112   extends: cwl:ProcessRequirement
 113   inVocab: false
 114   doc: |
 115     Set Arvados-specific runtime hints.
 116   fields:
 117     - name: class
 118       type: string
 119       doc: "Always 'arv:RuntimeConstraints'"
 120       jsonldPredicate:
 121         _id: "@type"
 122         _type: "@vocab"
 123     - name: keep_cache
 124       type: int?
 125       doc: |
 126         Size of file data buffer for Keep mount in MiB. Default is 256
 127         MiB. Increase this to reduce cache thrashing in situations such as
 128         accessing multiple large (64+ MiB) files at the same time, or
 129         performing random access on a large file.
 130     - name: outputDirType
 131       type: OutputDirType?
 132       doc: |
 133         Preferred backing store for output staging.  If not specified, the
 134         system may choose which one to use.
 135
 136 - name: PartitionRequirement
 137   type: record
 138   extends: cwl:ProcessRequirement
 139   inVocab: false
 140   doc: |
 141     Select preferred compute partitions on which to run jobs.
 142   fields:
 143     - name: class
 144       type: string
 145       doc: "Always 'arv:PartitionRequirement'"
 146       jsonldPredicate:
 147         _id: "@type"
 148         _type: "@vocab"
 149     - name: partition
 150       type:
 151         - string
 152         - string[]
 153
 154 - name: APIRequirement
 155   type: record
 156   extends: cwl:ProcessRequirement
 157   inVocab: false
 158   doc: |
 159     Indicates that process wants to access to the Arvados API.  Will be granted
 160     limited network access and have ARVADOS_API_HOST and ARVADOS_API_TOKEN set
 161     in the environment.
 162   fields:
 163     - name: class
 164       type: string
 165       doc: "Always 'arv:APIRequirement'"
 166       jsonldPredicate:
 167         _id: "@type"
 168         _type: "@vocab"
 169
 170 - name: IntermediateOutput
 171   type: record
 172   extends: cwl:ProcessRequirement
 173   inVocab: false
 174   doc: |
 175     Specify desired handling of intermediate output collections.
 176   fields:
 177     class:
 178       type: string
 179       doc: "Always 'arv:IntermediateOutput'"
 180       jsonldPredicate:
 181         _id: "@type"
 182         _type: "@vocab"
 183     outputTTL:
 184       type: int
 185       doc: |
 186         If the value is greater than zero, consider intermediate output
 187         collections to be temporary and should be automatically
 188         trashed. Temporary collections will be trashed `outputTTL` seconds
 189         after creation.  A value of zero means intermediate output should be
 190         retained indefinitely (this is the default behavior).
 191
 192         Note: arvados-cwl-runner currently does not take workflow dependencies
 193         into account when setting the TTL on an intermediate output
 194         collection. If the TTL is too short, it is possible for a collection to
 195         be trashed before downstream steps that consume it are started.  The
 196         recommended minimum value for TTL is the expected duration of the
 197         entire the workflow.
 198
 199 - name: ReuseRequirement
 200   type: record
 201   extends: cwl:ProcessRequirement
 202   inVocab: false
 203   doc: |
 204     Enable/disable work reuse for current process.  Default true (work reuse enabled).
 205   fields:
 206     - name: class
 207       type: string
 208       doc: "Always 'arv:ReuseRequirement'"
 209       jsonldPredicate:
 210         _id: "@type"
 211         _type: "@vocab"
 212     - name: enableReuse
 213       type: boolean
 214
 215 - name: WorkflowRunnerResources
 216   type: record
 217   extends: cwl:ProcessRequirement
 218   inVocab: false
 219   doc: |
 220     Specify memory or cores resource request for the CWL runner process itself.
 221   fields:
 222     class:
 223       type: string
 224       doc: "Always 'arv:WorkflowRunnerResources'"
 225       jsonldPredicate:
 226         _id: "@type"
 227         _type: "@vocab"
 228     ramMin:
 229       type: int?
 230       doc: Minimum RAM, in mebibytes (2**20)
 231       jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/ramMin"
 232     coresMin:
 233       type: int?
 234       doc: Minimum cores allocated to cwl-runner
 235       jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
 236     keep_cache:
 237       type: int?
 238       doc: |
 239         Size of collection metadata cache for the workflow runner, in
 240         MiB.  Default 256 MiB.  Will be added on to the RAM request
 241         when determining node size to request.
 242       jsonldPredicate: "http://arvados.org/cwl#RuntimeConstraints/keep_cache"
 243     acrContainerImage:
 244       type: string?
 245       doc: |
 246         The container image containing the correct version of
 247         arvados-cwl-runner to use when invoking the workflow on
 248         Arvados.
 249
 250 - name: ClusterTarget
 251   type: record
 252   extends: cwl:ProcessRequirement
 253   inVocab: false
 254   doc: |
 255     Specify where a workflow step should run
 256   fields:
 257     class:
 258       type: string
 259       doc: "Always 'arv:ClusterTarget'"
 260       jsonldPredicate:
 261         _id: "@type"
 262         _type: "@vocab"
 263     cluster_id:
 264       type: string?
 265       doc: The cluster to run the container
 266     project_uuid:
 267       type: string?
 268       doc: The project that will own the container requests and intermediate collections
 269
 270
 271 - name: OutputStorageClass
 272   type: record
 273   extends: cwl:ProcessRequirement
 274   inVocab: false
 275   doc: |
 276     Specify the storage class to be used for intermediate and final output
 277   fields:
 278     class:
 279       type: string
 280       doc: "Always 'arv:StorageClassHint"
 281       jsonldPredicate:
 282         _id: "@type"
 283         _type: "@vocab"
 284     intermediateStorageClass:
 285       type:
 286         - "null"
 287         - string
 288         - type: array
 289           items: string
 290       doc: One or more storages classes
 291     finalStorageClass:
 292       type:
 293         - "null"
 294         - string
 295         - type: array
 296           items: string
 297       doc: One or more storages classes
 298
 299 - type: record
 300   name: PropertyDef
 301   doc: |
 302     Define an arvados metadata property that will be set on a
 303     container request or output collection.
 304   fields:
 305     - name: propertyName
 306       type: string
 307       doc: The property key
 308     - name: propertyValue
 309       type: [Any]
 310       doc: The property value
 311
 312
 313 - name: ProcessProperties
 314   type: record
 315   extends: cwl:ProcessRequirement
 316   inVocab: false
 317   doc: |
 318     Specify metadata properties that will be set on the submitted
 319     container request associated with this workflow or step.
 320   fields:
 321     class:
 322       type: string
 323       doc: "Always 'arv:ProcessProperties"
 324       jsonldPredicate:
 325         _id: "@type"
 326         _type: "@vocab"
 327     processProperties:
 328       type: PropertyDef[]
 329       jsonldPredicate:
 330         mapSubject: propertyName
 331         mapPredicate: propertyValue
 332
 333
 334 - name: cwltool:CUDARequirement
 335   type: record
 336   extends: cwl:ProcessRequirement
 337   inVocab: false
 338   doc: |
 339     Require support for NVIDA CUDA (GPU hardware acceleration).
 340   fields:
 341     class:
 342       type: string
 343       doc: 'cwltool:CUDARequirement'
 344       jsonldPredicate:
 345         _id: "@type"
 346         _type: "@vocab"
 347     cudaVersionMin:
 348       type: string
 349       doc: |
 350         Minimum CUDA version to run the software, in X.Y format.  This
 351         corresponds to a CUDA SDK release.  When running directly on
 352         the host (not in a container) the host must have a compatible
 353         CUDA SDK (matching the exact version, or, starting with CUDA
 354         11.3, matching major version).  When run in a container, the
 355         container image should provide the CUDA runtime, and the host
 356         driver is injected into the container.  In this case, because
 357         CUDA drivers are backwards compatible, it is possible to
 358         use an older SDK with a newer driver across major versions.
 359
 360         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
 361         details.
 362     cudaComputeCapability:
 363       type:
 364         - 'string'
 365         - 'string[]'
 366       doc: |
 367         CUDA hardware capability required to run the software, in X.Y
 368         format.
 369
 370         * If this is a single value, it defines only the minimum
 371           compute capability.  GPUs with higher capability are also
 372           accepted.
 373
 374         * If it is an array value, then only select GPUs with compute
 375           capabilities that explicitly appear in the array.
 376     cudaDeviceCountMin:
 377       type: ['null', int, cwl:Expression]
 378       default: 1
 379       doc: |
 380         Minimum number of GPU devices to request.  If not specified,
 381         same as `cudaDeviceCountMax`.  If neither are specified,
 382         default 1.
 383     cudaDeviceCountMax:
 384       type: ['null', int, cwl:Expression]
 385       doc: |
 386         Maximum number of GPU devices to request.  If not specified,
 387         same as `cudaDeviceCountMin`.
 388
 389 - name: UsePreemptible
 390   type: record
 391   extends: cwl:ProcessRequirement
 392   inVocab: false
 393   doc: |
 394     Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
 395   fields:
 396     class:
 397       type: string
 398       doc: "Always 'arv:UsePreemptible"
 399       jsonldPredicate:
 400         _id: "@type"
 401         _type: "@vocab"
 402     usePreemptible: boolean
 403
 404 - name: OutputCollectionProperties
 405   type: record
 406   extends: cwl:ProcessRequirement
 407   inVocab: false
 408   doc: |
 409     Specify metadata properties that will be set on the output
 410     collection associated with this workflow or step.
 411   fields:
 412     class:
 413       type: string
 414       doc: "Always 'arv:OutputCollectionProperties"
 415       jsonldPredicate:
 416         _id: "@type"
 417         _type: "@vocab"
 418     outputProperties:
 419       type: PropertyDef[]
 420       jsonldPredicate:
 421         mapSubject: propertyName
 422         mapPredicate: propertyValue
 423
 424
 425 - name: KeepCacheType
 426   type: enum
 427   symbols:
 428     - ram_cache
 429     - disk_cache
 430   doc:
 431     - |
 432         ram_cache: Keep blocks will be cached in RAM only.
 433     - |
 434         disk_cache: Keep blocks will be cached to disk and
 435         memory-mapped.  The disk cache leverages the kernel's virtual
 436         memory system so "hot" data will generally still be kept in
 437         RAM.
 438
 439 - name: KeepCacheTypeRequirement
 440   type: record
 441   extends: cwl:ProcessRequirement
 442   inVocab: false
 443   doc: |
 444     Choose keep cache strategy.
 445   fields:
 446     - name: class
 447       type: string
 448       doc: "'arv:KeepCacheTypeRequirement'"
 449       jsonldPredicate:
 450         _id: "@type"
 451         _type: "@vocab"
 452     - name: keepCacheType
 453       type: KeepCacheType?
 454       doc: |
 455         Whether Keep blocks loaded by arv-mount should be kept in RAM
 456         only or written to disk and memory-mapped.  The disk cache
 457         leverages the kernel's virtual memory system so "hot" data will
 458         generally still be kept in RAM.
 459
 460 - name: OutOfMemoryRetry
 461   type: record
 462   extends: cwl:ProcessRequirement
 463   inVocab: false
 464   doc: |
 465     Detect when a failed tool run may have run out of memory, and
 466     re-submit the container with more RAM.
 467   fields:
 468     - name: class
 469       type: string
 470       doc: "'arv:OutOfMemoryRetry"
 471       jsonldPredicate:
 472         _id: "@type"
 473         _type: "@vocab"
 474     - name: memoryErrorRegex
 475       type: string?
 476       doc: |
 477         A regular expression that will be used on the text of stdout
 478         and stderr produced by the tool to determine if a failed job
 479         should be retried with more RAM.  By default, searches for the
 480         substrings 'bad_alloc' and 'OutOfMemory'.
 481     - name: memoryRetryMultiplier
 482       type: float?
 483       doc: |
 484         If the container failed on its first run, re-submit the
 485         container with the RAM request multiplied by this factor.
 486     - name: memoryRetryMultipler
 487       type: float?
 488       doc: |
 489         Deprecated misspelling of "memoryRetryMultiplier".  Kept only
 490         for backwards compatability, don't use this.