14440: Working on docs for federated workflows.
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 20 Nov 2018 22:20:52 +0000 (17:20 -0500)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 26 Nov 2018 15:44:52 +0000 (10:44 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

doc/_config.yml
doc/user/cwl/federated-workflows.html.textile.liquid [new file with mode: 0644]
doc/user/cwl/federated/cat.cwl [new file with mode: 0644]
doc/user/cwl/federated/federated.cwl [new file with mode: 0644]
doc/user/cwl/federated/md5sum.cwl [new file with mode: 0644]
doc/user/cwl/federated/shards.yml [new file with mode: 0644]

index 21260f761282adb57d3a90cf15635d1f16bc0d4e..f51e3797180da868942f8e4bda175098017cdcd9 100644 (file)
@@ -51,6 +51,7 @@ navbar:
       - user/tutorials/writing-cwl-workflow.html.textile.liquid
       - user/cwl/cwl-style.html.textile.liquid
       - user/cwl/cwl-extensions.html.textile.liquid
+      - user/cwl/federated-workflows.html.textile.liquid
       - user/topics/arv-docker.html.textile.liquid
     - Reference:
       - user/topics/link-accounts.html.textile.liquid
diff --git a/doc/user/cwl/federated-workflows.html.textile.liquid b/doc/user/cwl/federated-workflows.html.textile.liquid
new file mode 100644 (file)
index 0000000..a692bf4
--- /dev/null
@@ -0,0 +1,20 @@
+---
+layout: default
+navsection: userguide
+title: Federated Multi-Cluster Workflows
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados supports federated workflows, where different step of a running workflow execute on different clusters.  This supports running analysis on geographically dispersed data (avoiding expensive data transfers by sending the computation to the data) or in "hybrid cloud" configurations where an on-premise cluster can expand its capabilities by sending work to a cloud-base cluster.
+
+h2. Federated scatter/gather example
+
+<pre>
+</pre>
+
+<pre>
+</pre>
diff --git a/doc/user/cwl/federated/cat.cwl b/doc/user/cwl/federated/cat.cwl
new file mode 100644 (file)
index 0000000..17132fe
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+  inp:
+    type: File[]
+    inputBinding: {}
+outputs:
+  joined: stdout
+stdout: joined.txt
+baseCommand: cat
diff --git a/doc/user/cwl/federated/federated.cwl b/doc/user/cwl/federated/federated.cwl
new file mode 100644 (file)
index 0000000..6c58997
--- /dev/null
@@ -0,0 +1,41 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+requirements:
+  InlineJavascriptRequirement: {}
+  DockerRequirement:
+    dockerPull: arvados/fed-test:scatter-gather
+  ScatterFeatureRequirement: {}
+  SchemaDefRequirement:
+    types:
+      - name: FileOnCluster
+        type: record
+        fields:
+          file: File
+          cluster: string
+inputs:
+  shards:
+    type:
+      type: array
+      items: FileOnCluster
+outputs:
+  joined:
+    type: File
+    outputSource: gather-results/joined
+steps:
+  distributed-analysis:
+    in:
+      shards: shards
+      inp: {valueFrom: $(inputs.shards.file)}
+    scatter: shards
+    hints:
+      arv:ClusterTarget:
+        cluster_id: $(inputs.shards.cluster)
+    out: [out]
+    run: md5sum.cwl
+  gather-results:
+    in:
+      inp: distributed-analysis/out
+    out: [joined]
+    run: cat.cwl
diff --git a/doc/user/cwl/federated/md5sum.cwl b/doc/user/cwl/federated/md5sum.cwl
new file mode 100644 (file)
index 0000000..9c78dc2
--- /dev/null
@@ -0,0 +1,21 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  inp:
+    type: File
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: out.txt
+stdin: $(inputs.inp.path)
+stdout: out.txt
+arguments: ["md5sum", "-"]
diff --git a/doc/user/cwl/federated/shards.yml b/doc/user/cwl/federated/shards.yml
new file mode 100644 (file)
index 0000000..1ca4f2e
--- /dev/null
@@ -0,0 +1,13 @@
+shards:
+  - cluster: clsr1
+    file:
+      class: File
+      location: file-on-clsr1.dat
+  - cluster: clsr2
+    file:
+      class: File
+      location: file-on-clsr2.dat
+  - cluster: clsr3
+    file:
+      class: File
+      location: file-on-clsr3.dat