From 267477f569cd2632ea14d6fdd467760c669fc087 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 20 Nov 2018 17:20:52 -0500 Subject: [PATCH] 14440: Working on docs for federated workflows. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- doc/_config.yml | 1 + .../federated-workflows.html.textile.liquid | 20 +++++++++ doc/user/cwl/federated/cat.cwl | 14 +++++++ doc/user/cwl/federated/federated.cwl | 41 +++++++++++++++++++ doc/user/cwl/federated/md5sum.cwl | 21 ++++++++++ doc/user/cwl/federated/shards.yml | 13 ++++++ 6 files changed, 110 insertions(+) create mode 100644 doc/user/cwl/federated-workflows.html.textile.liquid create mode 100644 doc/user/cwl/federated/cat.cwl create mode 100644 doc/user/cwl/federated/federated.cwl create mode 100644 doc/user/cwl/federated/md5sum.cwl create mode 100644 doc/user/cwl/federated/shards.yml diff --git a/doc/_config.yml b/doc/_config.yml index 21260f7612..f51e379718 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -51,6 +51,7 @@ navbar: - user/tutorials/writing-cwl-workflow.html.textile.liquid - user/cwl/cwl-style.html.textile.liquid - user/cwl/cwl-extensions.html.textile.liquid + - user/cwl/federated-workflows.html.textile.liquid - user/topics/arv-docker.html.textile.liquid - Reference: - user/topics/link-accounts.html.textile.liquid diff --git a/doc/user/cwl/federated-workflows.html.textile.liquid b/doc/user/cwl/federated-workflows.html.textile.liquid new file mode 100644 index 0000000000..a692bf4fff --- /dev/null +++ b/doc/user/cwl/federated-workflows.html.textile.liquid @@ -0,0 +1,20 @@ +--- +layout: default +navsection: userguide +title: Federated Multi-Cluster Workflows +... +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + +Arvados supports federated workflows, where different step of a running workflow execute on different clusters. This supports running analysis on geographically dispersed data (avoiding expensive data transfers by sending the computation to the data) or in "hybrid cloud" configurations where an on-premise cluster can expand its capabilities by sending work to a cloud-base cluster. + +h2. Federated scatter/gather example + +
+
+ +
+
diff --git a/doc/user/cwl/federated/cat.cwl b/doc/user/cwl/federated/cat.cwl new file mode 100644 index 0000000000..17132fe61c --- /dev/null +++ b/doc/user/cwl/federated/cat.cwl @@ -0,0 +1,14 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +cwlVersion: v1.0 +class: CommandLineTool +inputs: + inp: + type: File[] + inputBinding: {} +outputs: + joined: stdout +stdout: joined.txt +baseCommand: cat diff --git a/doc/user/cwl/federated/federated.cwl b/doc/user/cwl/federated/federated.cwl new file mode 100644 index 0000000000..6c58997a0b --- /dev/null +++ b/doc/user/cwl/federated/federated.cwl @@ -0,0 +1,41 @@ +cwlVersion: v1.0 +class: Workflow +$namespaces: + arv: "http://arvados.org/cwl#" +requirements: + InlineJavascriptRequirement: {} + DockerRequirement: + dockerPull: arvados/fed-test:scatter-gather + ScatterFeatureRequirement: {} + SchemaDefRequirement: + types: + - name: FileOnCluster + type: record + fields: + file: File + cluster: string +inputs: + shards: + type: + type: array + items: FileOnCluster +outputs: + joined: + type: File + outputSource: gather-results/joined +steps: + distributed-analysis: + in: + shards: shards + inp: {valueFrom: $(inputs.shards.file)} + scatter: shards + hints: + arv:ClusterTarget: + cluster_id: $(inputs.shards.cluster) + out: [out] + run: md5sum.cwl + gather-results: + in: + inp: distributed-analysis/out + out: [joined] + run: cat.cwl diff --git a/doc/user/cwl/federated/md5sum.cwl b/doc/user/cwl/federated/md5sum.cwl new file mode 100644 index 0000000000..9c78dc2685 --- /dev/null +++ b/doc/user/cwl/federated/md5sum.cwl @@ -0,0 +1,21 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +cwlVersion: v1.0 +class: CommandLineTool +$namespaces: + arv: "http://arvados.org/cwl#" +requirements: + InlineJavascriptRequirement: {} +inputs: + inp: + type: File +outputs: + out: + type: File + outputBinding: + glob: out.txt +stdin: $(inputs.inp.path) +stdout: out.txt +arguments: ["md5sum", "-"] diff --git a/doc/user/cwl/federated/shards.yml b/doc/user/cwl/federated/shards.yml new file mode 100644 index 0000000000..1ca4f2e3f6 --- /dev/null +++ b/doc/user/cwl/federated/shards.yml @@ -0,0 +1,13 @@ +shards: + - cluster: clsr1 + file: + class: File + location: file-on-clsr1.dat + - cluster: clsr2 + file: + class: File + location: file-on-clsr2.dat + - cluster: clsr3 + file: + class: File + location: file-on-clsr3.dat -- 2.30.2