sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml

   1 # Copyright (C) The Arvados Authors. All rights reserved.
   2 #
   3 # SPDX-License-Identifier: Apache-2.0
   4
   5 $base: "http://arvados.org/cwl#"
   6 $namespaces:
   7   cwl: "https://w3id.org/cwl/cwl#"
   8   cwltool: "http://commonwl.org/cwltool#"
   9 $graph:
  10 - $import: https://w3id.org/cwl/CommonWorkflowLanguage.yml
  11
  12 - name: cwltool:Secrets
  13   type: record
  14   inVocab: false
  15   extends: cwl:ProcessRequirement
  16   fields:
  17     class:
  18       type: string
  19       doc: "Always 'Secrets'"
  20       jsonldPredicate:
  21         "_id": "@type"
  22         "_type": "@vocab"
  23     secrets:
  24       type: string[]
  25       doc: |
  26         List one or more input parameters that are sensitive (such as passwords)
  27         which will be deliberately obscured from logging.
  28       jsonldPredicate:
  29         "_type": "@id"
  30         refScope: 0
  31
  32 - name: RunInSingleContainer
  33   type: record
  34   extends: cwl:ProcessRequirement
  35   inVocab: false
  36   doc: |
  37     Indicates that a subworkflow should run in a single container
  38     and not be scheduled as separate steps.
  39   fields:
  40     - name: class
  41       type: string
  42       doc: "Always 'arv:RunInSingleContainer'"
  43       jsonldPredicate:
  44         _id: "@type"
  45         _type: "@vocab"
  46
  47 - name: OutputDirType
  48   type: enum
  49   symbols:
  50     - local_output_dir
  51     - keep_output_dir
  52   doc:
  53     - |
  54       local_output_dir: Use regular file system local to the compute node.
  55       There must be sufficient local scratch space to store entire output;
  56       specify this with `outdirMin` of `ResourceRequirement`.  Files are
  57       batch uploaded to Keep when the process completes.  Most compatible, but
  58       upload step can be time consuming for very large files.
  59     - |
  60       keep_output_dir: Use writable Keep mount.  Files are streamed to Keep as
  61       they are written.  Does not consume local scratch space, but does consume
  62       RAM for output buffers (up to 192 MiB per file simultaneously open for
  63       writing.)  Best suited to processes which produce sequential output of
  64       large files (non-sequential writes may produced fragmented file
  65       manifests).  Supports regular files and directories, does not support
  66       special files such as symlinks, hard links, named pipes, named sockets,
  67       or device nodes.
  68
  69
  70 - name: RuntimeConstraints
  71   type: record
  72   extends: cwl:ProcessRequirement
  73   inVocab: false
  74   doc: |
  75     Set Arvados-specific runtime hints.
  76   fields:
  77     - name: class
  78       type: string
  79       doc: "Always 'arv:RuntimeConstraints'"
  80       jsonldPredicate:
  81         _id: "@type"
  82         _type: "@vocab"
  83     - name: keep_cache
  84       type: int?
  85       doc: |
  86         Size of file data buffer for Keep mount in MiB. Default is 256
  87         MiB. Increase this to reduce cache thrashing in situations such as
  88         accessing multiple large (64+ MiB) files at the same time, or
  89         performing random access on a large file.
  90     - name: outputDirType
  91       type: OutputDirType?
  92       doc: |
  93         Preferred backing store for output staging.  If not specified, the
  94         system may choose which one to use.
  95
  96 - name: PartitionRequirement
  97   type: record
  98   extends: cwl:ProcessRequirement
  99   inVocab: false
 100   doc: |
 101     Select preferred compute partitions on which to run jobs.
 102   fields:
 103     - name: class
 104       type: string
 105       doc: "Always 'arv:PartitionRequirement'"
 106       jsonldPredicate:
 107         _id: "@type"
 108         _type: "@vocab"
 109     - name: partition
 110       type:
 111         - string
 112         - string[]
 113
 114 - name: APIRequirement
 115   type: record
 116   extends: cwl:ProcessRequirement
 117   inVocab: false
 118   doc: |
 119     Indicates that process wants to access to the Arvados API.  Will be granted
 120     limited network access and have ARVADOS_API_HOST and ARVADOS_API_TOKEN set
 121     in the environment.
 122   fields:
 123     - name: class
 124       type: string
 125       doc: "Always 'arv:APIRequirement'"
 126       jsonldPredicate:
 127         _id: "@type"
 128         _type: "@vocab"
 129
 130 - name: IntermediateOutput
 131   type: record
 132   extends: cwl:ProcessRequirement
 133   inVocab: false
 134   doc: |
 135     Specify desired handling of intermediate output collections.
 136   fields:
 137     class:
 138       type: string
 139       doc: "Always 'arv:IntermediateOutput'"
 140       jsonldPredicate:
 141         _id: "@type"
 142         _type: "@vocab"
 143     outputTTL:
 144       type: int
 145       doc: |
 146         If the value is greater than zero, consider intermediate output
 147         collections to be temporary and should be automatically
 148         trashed. Temporary collections will be trashed `outputTTL` seconds
 149         after creation.  A value of zero means intermediate output should be
 150         retained indefinitely (this is the default behavior).
 151
 152         Note: arvados-cwl-runner currently does not take workflow dependencies
 153         into account when setting the TTL on an intermediate output
 154         collection. If the TTL is too short, it is possible for a collection to
 155         be trashed before downstream steps that consume it are started.  The
 156         recommended minimum value for TTL is the expected duration of the
 157         entire the workflow.
 158
 159 - name: WorkflowRunnerResources
 160   type: record
 161   extends: cwl:ProcessRequirement
 162   inVocab: false
 163   doc: |
 164     Specify memory or cores resource request for the CWL runner process itself.
 165   fields:
 166     class:
 167       type: string
 168       doc: "Always 'arv:WorkflowRunnerResources'"
 169       jsonldPredicate:
 170         _id: "@type"
 171         _type: "@vocab"
 172     ramMin:
 173       type: int?
 174       doc: Minimum RAM, in mebibytes (2**20)
 175       jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/ramMin"
 176     coresMin:
 177       type: int?
 178       doc: Minimum cores allocated to cwl-runner
 179       jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
 180     keep_cache:
 181       type: int?
 182       doc: |
 183         Size of collection metadata cache for the workflow runner, in
 184         MiB.  Default 256 MiB.  Will be added on to the RAM request
 185         when determining node size to request.
 186       jsonldPredicate: "http://arvados.org/cwl#RuntimeConstraints/keep_cache"
 187     acrContainerImage:
 188       type: string?
 189       doc: |
 190         The container image containing the correct version of
 191         arvados-cwl-runner to use when invoking the workflow on
 192         Arvados.
 193
 194 - name: ClusterTarget
 195   type: record
 196   extends: cwl:ProcessRequirement
 197   inVocab: false
 198   doc: |
 199     Specify where a workflow step should run
 200   fields:
 201     class:
 202       type: string
 203       doc: "Always 'arv:ClusterTarget'"
 204       jsonldPredicate:
 205         _id: "@type"
 206         _type: "@vocab"
 207     cluster_id:
 208       type: string?
 209       doc: The cluster to run the container
 210     project_uuid:
 211       type: string?
 212       doc: The project that will own the container requests and intermediate collections
 213
 214 - name: OutputStorageClass
 215   type: record
 216   extends: cwl:ProcessRequirement
 217   inVocab: false
 218   doc: |
 219     Specify the storage class to be used for intermediate and final output
 220   fields:
 221     class:
 222       type: string
 223       doc: "Always 'arv:StorageClassHint"
 224       jsonldPredicate:
 225         _id: "@type"
 226         _type: "@vocab"
 227     intermediateStorageClass:
 228       type:
 229         - "null"
 230         - string
 231         - type: array
 232           items: string
 233       doc: One or more storages classes
 234     finalStorageClass:
 235       type:
 236         - "null"
 237         - string
 238         - type: array
 239           items: string
 240       doc: One or more storages classes
 241
 242 - type: record
 243   name: PropertyDef
 244   doc: |
 245     Define a property that will be set on the submitted container
 246     request associated with this workflow or step.
 247   fields:
 248     - name: propertyName
 249       type: string
 250       doc: The property key
 251     - name: propertyValue
 252       type: [Any]
 253       doc: The property value
 254
 255
 256 - name: ProcessProperties
 257   type: record
 258   extends: cwl:ProcessRequirement
 259   inVocab: false
 260   doc: |
 261     Specify metadata properties that will be set on the submitted
 262     container request associated with this workflow or step.
 263   fields:
 264     class:
 265       type: string
 266       doc: "Always 'arv:ProcessProperties"
 267       jsonldPredicate:
 268         _id: "@type"
 269         _type: "@vocab"
 270     processProperties:
 271       type: PropertyDef[]
 272       jsonldPredicate:
 273         mapSubject: propertyName
 274         mapPredicate: propertyValue
 275
 276
 277 - name: cwltool:CUDARequirement
 278   type: record
 279   extends: cwl:ProcessRequirement
 280   inVocab: false
 281   doc: |
 282     Require support for NVIDA CUDA (GPU hardware acceleration).
 283   fields:
 284     class:
 285       type: string
 286       doc: 'cwltool:CUDARequirement'
 287       jsonldPredicate:
 288         _id: "@type"
 289         _type: "@vocab"
 290     cudaVersionMin:
 291       type: string
 292       doc: |
 293         Minimum CUDA version to run the software, in X.Y format.  This
 294         corresponds to a CUDA SDK release.  When running directly on
 295         the host (not in a container) the host must have a compatible
 296         CUDA SDK (matching the exact version, or, starting with CUDA
 297         11.3, matching major version).  When run in a container, the
 298         container image should provide the CUDA runtime, and the host
 299         driver is injected into the container.  In this case, because
 300         CUDA drivers are backwards compatible, it is possible to
 301         use an older SDK with a newer driver across major versions.
 302
 303         See https://docs.nvidia.com/deploy/cuda-compatibility/ for
 304         details.
 305     cudaComputeCapability:
 306       type:
 307         - 'string'
 308         - 'string[]'
 309       doc: |
 310         CUDA hardware capability required to run the software, in X.Y
 311         format.
 312
 313         * If this is a single value, it defines only the minimum
 314           compute capability.  GPUs with higher capability are also
 315           accepted.
 316
 317         * If it is an array value, then only select GPUs with compute
 318           capabilities that explicitly appear in the array.
 319     cudaDeviceCountMin:
 320       type: ['null', int, cwl:Expression]
 321       default: 1
 322       doc: |
 323         Minimum number of GPU devices to request.  If not specified,
 324         same as `cudaDeviceCountMax`.  If neither are specified,
 325         default 1.
 326     cudaDeviceCountMax:
 327       type: ['null', int, cwl:Expression]
 328       doc: |
 329         Maximum number of GPU devices to request.  If not specified,
 330         same as `cudaDeviceCountMin`.
 331     cudaVram:
 332       type: ['null', long, cwl:Expression]
 333       default: 1024
 334       doc: |
 335         Amount of VRAM to request, in mebibytes (2**20)
 336
 337
 338 - name: ROCmRequirement
 339   type: record
 340   extends: cwl:ProcessRequirement
 341   inVocab: false
 342   doc: |
 343     Require support for AMD ROCm (GPU hardware acceleration).
 344   fields:
 345     class:
 346       type: string
 347       doc: 'arv:ROCmRequirement'
 348       jsonldPredicate:
 349         _id: "@type"
 350         _type: "@vocab"
 351     rocmDriverVersion:
 352       type: string
 353       doc: |
 354         Compatible ROCm driver version, in X.Y format, e.g. "6.2".
 355     rocmTarget:
 356       type:
 357         - 'string'
 358         - 'string[]'
 359       doc: |
 360         Compatible GPU architecture/ROCm LLVM targets, e.g. "gfx1100".
 361     rocmDeviceCountMin:
 362       type: ['null', int, cwl:Expression]
 363       default: 1
 364       doc: |
 365         Minimum number of GPU devices to request.  If not specified,
 366         same as `rocmDeviceCountMax`.  If neither are specified,
 367         default 1.
 368     rocmDeviceCountMax:
 369       type: ['null', int, cwl:Expression]
 370       doc: |
 371         Maximum number of GPU devices to request.  If not specified,
 372         same as `rocmDeviceCountMin`.
 373     rocmVram:
 374       type: [long, cwl:Expression]
 375       default: 1024
 376       doc: |
 377         Amount of VRAM to request, in mebibytes (2**20).
 378
 379 - name: UsePreemptible
 380   type: record
 381   extends: cwl:ProcessRequirement
 382   inVocab: false
 383   doc: |
 384     Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
 385   fields:
 386     class:
 387       type: string
 388       doc: "Always 'arv:UsePreemptible"
 389       jsonldPredicate:
 390         _id: "@type"
 391         _type: "@vocab"
 392     usePreemptible: boolean
 393
 394 - name: OutputCollectionProperties
 395   type: record
 396   extends: cwl:ProcessRequirement
 397   inVocab: false
 398   doc: |
 399     Specify metadata properties that will be set on the output
 400     collection associated with this workflow or step.
 401   fields:
 402     class:
 403       type: string
 404       doc: "Always 'arv:OutputCollectionProperties"
 405       jsonldPredicate:
 406         _id: "@type"
 407         _type: "@vocab"
 408     outputProperties:
 409       type: PropertyDef[]
 410       jsonldPredicate:
 411         mapSubject: propertyName
 412         mapPredicate: propertyValue
 413
 414
 415 - name: KeepCacheType
 416   type: enum
 417   symbols:
 418     - ram_cache
 419     - disk_cache
 420   doc:
 421     - |
 422         ram_cache: Keep blocks will be cached in RAM only.
 423     - |
 424         disk_cache: Keep blocks will be cached to disk and
 425         memory-mapped.  The disk cache leverages the kernel's virtual
 426         memory system so "hot" data will generally still be kept in
 427         RAM.
 428
 429 - name: KeepCacheTypeRequirement
 430   type: record
 431   extends: cwl:ProcessRequirement
 432   inVocab: false
 433   doc: |
 434     Choose keep cache strategy.
 435   fields:
 436     - name: class
 437       type: string
 438       doc: "'arv:KeepCacheTypeRequirement'"
 439       jsonldPredicate:
 440         _id: "@type"
 441         _type: "@vocab"
 442     - name: keepCacheType
 443       type: KeepCacheType?
 444       doc: |
 445         Whether Keep blocks loaded by arv-mount should be kept in RAM
 446         only or written to disk and memory-mapped.  The disk cache
 447         leverages the kernel's virtual memory system so "hot" data will
 448         generally still be kept in RAM.
 449
 450 - name: OutOfMemoryRetry
 451   type: record
 452   extends: cwl:ProcessRequirement
 453   inVocab: false
 454   doc: |
 455     Detect when a failed tool run may have run out of memory, and
 456     re-submit the container with more RAM.
 457   fields:
 458     - name: class
 459       type: string
 460       doc: "'arv:OutOfMemoryRetry"
 461       jsonldPredicate:
 462         _id: "@type"
 463         _type: "@vocab"
 464     - name: memoryErrorRegex
 465       type: string?
 466       doc: |
 467         A regular expression that will be used on the text of stdout
 468         and stderr produced by the tool to determine if a failed job
 469         should be retried with more RAM.  By default, searches for the
 470         substrings 'bad_alloc' and 'OutOfMemory'.
 471     - name: memoryRetryMultiplier
 472       type: float?
 473       doc: |
 474         If the container failed on its first run, re-submit the
 475         container with the RAM request multiplied by this factor.
 476     - name: memoryRetryMultipler
 477       type: float?
 478       doc: |
 479         Deprecated misspelling of "memoryRetryMultiplier".  Kept only
 480         for backwards compatability, don't use this.