From 78baf73e784e01fb44d1b16aefaf6e295bb4d5b4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 30 Jun 2016 13:31:46 -0400 Subject: [PATCH] 9369: arvados-cwl-runner documentation and examples, add deprecation notices to pipeline documentation --- doc/_config.yml | 9 +- .../_pipeline_deprecation_notice.liquid | 3 + doc/sdk/python/sdk-python.html.textile.liquid | 16 +- doc/user/cwl/bwa-mem/bwa-mem-input-local.yml | 14 + doc/user/cwl/bwa-mem/bwa-mem-input.yml | 14 + doc/user/cwl/bwa-mem/bwa-mem-template.yml | 4 + doc/user/cwl/bwa-mem/bwa-mem.cwl | 52 ++++ doc/user/cwl/cwl-runner.html.textile.liquid | 267 ++++++++++++++++++ ...vm-login-with-webshell.html.textile.liquid | 2 +- doc/user/topics/arv-copy.html.textile.liquid | 10 +- .../crunch-tools-overview.html.textile.liquid | 2 + ...-pipeline-command-line.html.textile.liquid | 2 +- .../tutorial-parallel.html.textile.liquid | 2 + ...nning-external-program.html.textile.liquid | 4 +- .../tutorial-firstscript.html.textile.liquid | 2 + .../tutorial-submit-job.html.textile.liquid | 2 + 16 files changed, 386 insertions(+), 19 deletions(-) create mode 100644 doc/_includes/_pipeline_deprecation_notice.liquid create mode 100755 doc/user/cwl/bwa-mem/bwa-mem-input-local.yml create mode 100755 doc/user/cwl/bwa-mem/bwa-mem-input.yml create mode 100755 doc/user/cwl/bwa-mem/bwa-mem-template.yml create mode 100755 doc/user/cwl/bwa-mem/bwa-mem.cwl create mode 100644 doc/user/cwl/cwl-runner.html.textile.liquid diff --git a/doc/_config.yml b/doc/_config.yml index 4115bc9616..b3b213bc45 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -43,13 +43,16 @@ navbar: - user/tutorials/tutorial-keep-mount.html.textile.liquid - user/topics/keep.html.textile.liquid - user/topics/arv-copy.html.textile.liquid - - Run a pipeline on the command line: + - Using Common Workflow Language: + - user/cwl/intro-cwl.html.textile.liquid + - user/cwl/cwl-runner.html.textile.liquid + - Working on the command line: - user/topics/running-pipeline-command-line.html.textile.liquid - user/topics/arv-run.html.textile.liquid - - Working with Arvados Repositories: + - Working with git repositories: - user/tutorials/add-new-repository.html.textile.liquid - user/tutorials/git-arvados-guide.html.textile.liquid - - Develop a new pipeline: + - Develop an Arvados pipeline: - user/tutorials/intro-crunch.html.textile.liquid - user/tutorials/running-external-program.html.textile.liquid - user/topics/crunch-tools-overview.html.textile.liquid diff --git a/doc/_includes/_pipeline_deprecation_notice.liquid b/doc/_includes/_pipeline_deprecation_notice.liquid new file mode 100644 index 0000000000..2fcb2e7068 --- /dev/null +++ b/doc/_includes/_pipeline_deprecation_notice.liquid @@ -0,0 +1,3 @@ +{% include 'notebox_begin' %} +Arvados pipeline templates are deprecated. The recommend way to develop new workflows for Arvados is using the "Common Workflow Language":{{site.baseurl}}/user/cwl/intro-cwl.html +{% include 'notebox_end' %} diff --git a/doc/sdk/python/sdk-python.html.textile.liquid b/doc/sdk/python/sdk-python.html.textile.liquid index a6a0b565c9..adbb316c3f 100644 --- a/doc/sdk/python/sdk-python.html.textile.liquid +++ b/doc/sdk/python/sdk-python.html.textile.liquid @@ -6,9 +6,7 @@ title: "Python SDK" ... -The Python SDK provides a generic set of wrappers so you can make API calls easily. It performs some validation before connecting to the API server: for example, it refuses to do an API call if a required parameter is missing. - -The library also includes some conveniences for use in Crunch scripts; see "Crunch utility libraries":crunch-utility-libraries.html for details. +The Python SDK provides access from Python to the Arvados API and Keep. In also includes a number of command line tools for using and administering Arvados and Keep, and some conveniences for use in Crunch scripts; see "Crunch utility libraries":crunch-utility-libraries.html for details. h3. Installation @@ -30,27 +28,27 @@ First, "add the appropriate package repository for your distribution":{{ site.ba On CentOS 6 and RHEL 6: -
~$ sudo yum install python27-python-arvados-python-client
+
~$ sudo yum install python27-python-arvados-python-client python27-python-arvados-cwl-runner
 
On other Red Hat-based systems: -
~$ sudo yum install python-arvados-python-client
+
~$ sudo yum install python-arvados-python-client python-arvados-cwl-runner
 
On Debian-based systems: -
~$ sudo apt-get install python-arvados-python-client
+
~$ sudo apt-get install python-arvados-python-client python-arvados-cwl-runner
 
h4. Option 2: Install with pip -Run @pip-2.7 install arvados-python-client@ in an appropriate installation environment, such as a virtualenv. +Run @pip-2.7 install arvados-python-client arvados-cwl-runner@ in an appropriate installation environment, such as a virtualenv. If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, try @pip-2.7 install --pre arvados-python-client@. @@ -60,7 +58,9 @@ Install the @python-setuptools@ package from your distribution. Then run the fo
~$ git clone https://github.com/curoverse/arvados.git
-~$ cd arvados/sdk/python
+~$ cd ~/arvados/sdk/python
+~$ python2.7 setup.py install
+~$ cd ~/arvados/sdk/cwl
 ~$ python2.7 setup.py install
 
diff --git a/doc/user/cwl/bwa-mem/bwa-mem-input-local.yml b/doc/user/cwl/bwa-mem/bwa-mem-input-local.yml new file mode 100755 index 0000000000..7f8c7bdb72 --- /dev/null +++ b/doc/user/cwl/bwa-mem/bwa-mem-input-local.yml @@ -0,0 +1,14 @@ +#!/usr/bin/env cwl-runner +cwl:tool: bwa-mem.cwl +reference: + class: File + path: 19.fasta.bwt +read_p1: + class: File + path: HWI-ST1027_129_D0THKACXX.1_1.fastq +read_p2: + class: File + path: HWI-ST1027_129_D0THKACXX.1_2.fastq +group_id: arvados_tutorial +sample_id: HWI-ST1027_129 +PL: illumina diff --git a/doc/user/cwl/bwa-mem/bwa-mem-input.yml b/doc/user/cwl/bwa-mem/bwa-mem-input.yml new file mode 100755 index 0000000000..8e29b60ecd --- /dev/null +++ b/doc/user/cwl/bwa-mem/bwa-mem-input.yml @@ -0,0 +1,14 @@ +#!/usr/bin/env cwl-runner +cwl:tool: bwa-mem.cwl +reference: + class: File + path: keep:2463fa9efeb75e099685528b3b9071e0+438/19.fasta.bwt +read_p1: + class: File + path: keep:ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_1.fastq +read_p2: + class: File + path: keep:ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_2.fastq +group_id: arvados_tutorial +sample_id: HWI-ST1027_129 +PL: illumina diff --git a/doc/user/cwl/bwa-mem/bwa-mem-template.yml b/doc/user/cwl/bwa-mem/bwa-mem-template.yml new file mode 100755 index 0000000000..bc59427cd1 --- /dev/null +++ b/doc/user/cwl/bwa-mem/bwa-mem-template.yml @@ -0,0 +1,4 @@ +reference: + class: File + path: keep:2463fa9efeb75e099685528b3b9071e0+438/19.fasta.bwt +PL: illumina diff --git a/doc/user/cwl/bwa-mem/bwa-mem.cwl b/doc/user/cwl/bwa-mem/bwa-mem.cwl new file mode 100755 index 0000000000..1a16a3dcd3 --- /dev/null +++ b/doc/user/cwl/bwa-mem/bwa-mem.cwl @@ -0,0 +1,52 @@ +#!/usr/bin/env cwl-runner +cwlVersion: draft-3 +class: CommandLineTool + +hints: + - class: DockerRequirement + dockerPull: biodckr/bwa +requirements: + - class: InlineJavascriptRequirement + +baseCommand: [bwa, mem] + +arguments: + - {prefix: "-t", valueFrom: $(runtime.cores)} + - {prefix: "-R", valueFrom: "@RG\tID:$(inputs.group_id)\tPL:$(inputs.PL)\tSM:$(inputs.sample_id)"} + +inputs: + - id: reference + type: File + inputBinding: + position: 1 + valueFrom: $(self.path.match(/(.*)\.[^.]+$/)[1]) + secondaryFiles: + - ^.ann + - ^.amb + - ^.pac + - ^.sa + description: The index files produced by `bwa index` + - id: read_p1 + type: File + inputBinding: + position: 2 + description: The reads, in fastq format. + - id: read_p2 + type: ["null", File] + inputBinding: + position: 3 + description: For mate paired reads, the second file (optional). + - id: group_id + type: string + - id: sample_id + type: string + - id: PL + type: string + +stdout: $(inputs.read_p1.path.match(/\/([^/]+)\.[^/.]+$/)[1] + ".sam") + +outputs: + - id: aligned_sam + type: File + outputBinding: + glob: $(inputs.read_p1.path.match(/\/([^/]+)\.[^/.]+$/)[1] + ".sam") diff --git a/doc/user/cwl/cwl-runner.html.textile.liquid b/doc/user/cwl/cwl-runner.html.textile.liquid new file mode 100644 index 0000000000..b6773c9f32 --- /dev/null +++ b/doc/user/cwl/cwl-runner.html.textile.liquid @@ -0,0 +1,267 @@ +--- +layout: default +navsection: userguide +title: Using Common Workflow Language +... + +The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms. CWL is the recommended way to develop and run Workflows for Arvados. Arvados fully supports the "CWL draft-3":http://commonwl.org/draft-3 specification. + +h2. Getting the example files + +{% include 'tutorial_expectations' %} + +The tutorial files are located in the documentation section Arvados source repository: + + +
~$ git clone https://github.com/curoverse/arvados
+~$ cd arvados/doc/user/cwl/bwa-mem
+
+
+ +The tutorial data is hosted on "http://cloud.curoverse.com":http://cloud.curoverse.com (also known as *qr1hi*). If you are using a different Arvados instance, you may need to copy the data to your own instance. The easiest way to do this is with "arv-copy":{{site.baseurl}}/user/topics/arv-copy.html (this requires signing up for a free cloud.curoverse.com account). + + +

+~$ arv-copy --src cloud --dst settings 2463fa9efeb75e099685528b3b9071e0+438
+~$ arv-copy --src cloud --dst settings ae480c5099b81e17267b7445e35b4bc7+180
+
+
+ +If you do not wish to create an account on "http://cloud.curoverse.com":http://cloud.curoverse.com, you may download the files anonymously and upload them to your local Arvados instance: + +https://cloud.curoverse.com/collections/2463fa9efeb75e099685528b3b9071e0+438 + +https://cloud.curoverse.com/collections/ae480c5099b81e17267b7445e35b4bc7+180 + +h2. Submitting a workflow to an Arvados cluster + +Use @arvados-cwl-runner@ to submit CWL workflows to Arvados. After submitting the job, it will wait for the workflow to complete and print out the final result to standard output. Note that once submitted, the workflow runs entirely on Arvados, so even if you interrupt @arvados-cwl-runner@ or log out, the workflow will continue to run. + + +
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner bwa-mem.cwl bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to 3d0ga-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job 3d0ga-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+
+
+ +To submit a workflow and exit immediately, use the @--no-wait@ option. This will print out the uuid of the job that was submitted to standard output. + + +
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to 3d0ga-4zz18-eqnfwrow8aysa9q
+2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job 3d0ga-8i9sb-fm2n3b1w0l6bskg
+3d0ga-8i9sb-fm2n3b1w0l6bskg
+
+
+ +To run a workflow with local control, use @--local@. This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting jobs. With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated. + + +

+~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance 3d0ga-d1hrv-92wcu6ldtio74r4
+2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-2nzzfbuf9zjrj4g) is Queued
+2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-2nzzfbuf9zjrj4g) is Running
+2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-2nzzfbuf9zjrj4g) is Complete
+2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "size": 30738986,
+        "path": "keep:15f56bad0aaa7364819bf14ca2a27c63+88/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File"
+    }
+}
+
+
+ +h2. Work reuse + +Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse. If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run. This also applies to individual workflow steps. For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step. You can disable this behavior with @--disable-reuse@. + +h2. Referencing files + +When running a workflow on an Arvados cluster, the input files must be stored in Keep. There are several ways this can happen. + +A URI reference to Keep uses the @keep:@ scheme followed by the portable data hash, collection size, and path to the file inside the collection. For example, @keep:2463fa9efeb75e099685528b3b9071e0+438/19.fasta.bwt@. + +If you reference a file in "arv-mount":{{site.baseurl}}/user/tutorials/tutorial-keep-mount.html , such as @/home/example/keep/by_id/2463fa9efeb75e099685528b3b9071e0+438/19.fasta.bwt@, then @arvados-cwl-runner@ will automatically determine the appropriate Keep URI reference. + +If you reference a local file which is not in @arv-mount@, then @arvados-cwl-runner@ will upload the file to Keep and use the Keep URI reference from the upload. + +h2. Registering a workflow with Workbench + +Use @--create-template@ to register a CWL workflow with Arvados Workbench. This enables you to run Workflows by clicking on the "Run a pipeline..." button on the Workbench Dashboard. + + +

+~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --create-template bwa-mem.cwl
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to 3d0ga-4zz18-7e0hedrmkuyoei3
+2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template 3d0ga-p5p6p-rjleou1dwr167v5
+3d0ga-p5p6p-rjleou1dwr167v5
+
+
+ +You can provide a partial input file to set default values for the Workflow input parameters: + + +

+~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --create-template bwa-mem.cwl bwa-mem-template.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to 3d0ga-4zz18-0f91qkovk4ml18o
+2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template 3d0ga-p5p6p-0deqe6nuuyqns2i
+3d0ga-p5p6p-0deqe6nuuyqns2i
+
+
+ +h2. Making workflows directly executable + +You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file: + +*bwa-mem.cwl* + + +
#!/usr/bin/env cwl-runner
+
+
+ + +
~/arvados/doc/user/cwl/bwa-mem$ ./bwa-mem.cwl bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to 3d0ga-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job 3d0ga-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+
+
+ +You can even make an input file directly executable the same way with the following two lines at the top: + +*bwa-mem-input.yml* + + +
#!/usr/bin/env cwl-runner
+cwl:tool: bwa-mem.cwl
+
+
+ + +
~/arvados/doc/user/cwl/bwa-mem$ ./bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to 3d0ga-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job 3d0ga-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (3d0ga-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+
+
+ +h2. Developing workflows + +For an introduction and and detailed documentation about writing CWL, see the "User Guide":http://commonwl.org/draft-3/UserGuide.html and the "Specification":http://commonwl.org/draft-3 . + +To run on Arvados, a workflow should provide a @DockerRequirement@ in the @hints@ section. + +When developing a workflow, it is often helpful to run it on the local host to avoid the overhead of submitting to the cluster. To execute a workflow only on the local host (without submitting jobs to an Arvados cluster) you can use the @cwltool@ command. Note that you must also have the input data accessible on the local host. You can use @arv-get@ to fetch the data from Keep. + + +

+~/arvados/doc/user/cwl/bwa-mem$ arv-get 2463fa9efeb75e099685528b3b9071e0+438/ .
+156 MiB / 156 MiB 100.0%
+~/arvados/doc/user/cwl/bwa-mem$ arv-get ae480c5099b81e17267b7445e35b4bc7+180/ .
+23 MiB / 23 MiB 100.0%
+~/arvados/doc/user/cwl/bwa-mem$ cwltool bwa-mem-input.yml bwa-mem-input-local.yml
+cwltool 1.0.20160629140624
+[job bwa-mem.cwl] /home/peter/.arvbox/arvbox/arvados/doc/user/cwl/bwa-mem$ docker \
+    run \
+    -i \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.ann:/var/lib/cwl/job979368791_bwa-mem/19.fasta.ann:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq:/var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.sa:/var/lib/cwl/job979368791_bwa-mem/19.fasta.sa:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.amb:/var/lib/cwl/job979368791_bwa-mem/19.fasta.amb:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.pac:/var/lib/cwl/job979368791_bwa-mem/19.fasta.pac:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq:/var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.bwt:/var/lib/cwl/job979368791_bwa-mem/19.fasta.bwt:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem:/var/spool/cwl:rw \
+    --volume=/tmp/tmpgzyou9:/tmp:rw \
+    --workdir=/var/spool/cwl \
+    --read-only=true \
+    --log-driver=none \
+    --user=1001 \
+    --rm \
+    --env=TMPDIR=/tmp \
+    --env=HOME=/var/spool/cwl \
+    biodckr/bwa \
+    bwa \
+    mem \
+    -t \
+    1 \
+    -R \
+    '@RG	ID:arvados_tutorial	PL:illumina	SM:HWI-ST1027_129' \
+    /var/lib/cwl/job979368791_bwa-mem/19.fasta \
+    /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq \
+    /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq > /home/peter/.arvbox/arvbox/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.sam
+[M::bwa_idx_load_from_disk] read 0 ALT contigs
+[M::process] read 100000 sequences (10000000 bp)...
+[M::mem_pestat] # candidate unique pairs for (FF, FR, RF, RR): (0, 4745, 1, 0)
+[M::mem_pestat] skip orientation FF as there are not enough pairs
+[M::mem_pestat] analyzing insert size distribution for orientation FR...
+[M::mem_pestat] (25, 50, 75) percentile: (154, 181, 214)
+[M::mem_pestat] low and high boundaries for computing mean and std.dev: (34, 334)
+[M::mem_pestat] mean and std.dev: (185.63, 44.88)
+[M::mem_pestat] low and high boundaries for proper pairs: (1, 394)
+[M::mem_pestat] skip orientation RF as there are not enough pairs
+[M::mem_pestat] skip orientation RR as there are not enough pairs
+[M::mem_process_seqs] Processed 100000 reads in 9.848 CPU sec, 9.864 real sec
+[main] Version: 0.7.12-r1039
+[main] CMD: bwa mem -t 1 -R @RG	ID:arvados_tutorial	PL:illumina	SM:HWI-ST1027_129 /var/lib/cwl/job979368791_bwa-mem/19.fasta /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq
+[main] Real time: 10.061 sec; CPU: 10.032 sec
+Final process status is success
+{
+    "aligned_sam": {
+        "size": 30738959,
+        "path": "/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0c668cca45fef02397bb5302880526d300ee4dac",
+        "class": "File"
+    }
+}
+
+
diff --git a/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid index 58ad868e5e..4ede3b97e3 100644 --- a/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid +++ b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid @@ -12,7 +12,7 @@ Webshell gives you access to an arvados virtual machine from your browser with n In the Arvados Workbench, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Virtual machines* to see the list of virtual machines you can access. If you do not have access to any virtual machines, please click on Send request for shell access or send an email to "support@curoverse.com":mailto:support@curoverse.com. -Each row in the Virtual Machines panel lists the hostname of the VM, along with a Log in as *you* button under the column "Web shell beta". Clicking on this button will open up a webshell terminal for you in a new browser tab and log you in. +Each row in the Virtual Machines panel lists the hostname of the VM, along with a Log in as *you* button under the column "Web shell". Clicking on this button will open up a webshell terminal for you in a new browser tab and log you in. !{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/vm-access-with-webshell.png! diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid index 1ec80a619b..ed0a126a41 100644 --- a/doc/user/topics/arv-copy.html.textile.liquid +++ b/doc/user/topics/arv-copy.html.textile.liquid @@ -13,9 +13,9 @@ h2. arv-copy @arv-copy@ allows users to copy collections and pipeline templates from one cluster to another. By default, @arv-copy@ will recursively go through a template and copy all dependencies associated with the object. -For example, let's copy from our beta cloud instance *qr1hi* to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi. +For example, let's copy from the cloud instance *qr1hi* to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi. -In order for the clusters to be able to communicate with each other, you must create custom configuration files for both clusters. In the Arvados Workbench, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *uuid_prefix.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Current token* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@. +In order to communicate with both clusters, you must create custom configuration files for each cluster. In the Arvados Workbench, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *uuid_prefix.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Current token* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@. !{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/api-token-host.png! @@ -39,7 +39,7 @@ First, select the uuid of the collection you want to copy from the source cluste Now copy the collection from *qr1hi* to *dst_cluster*. We will use the uuid @qr1hi-4zz18-tci4vn4fa95w0zx@ as an example. You can find this collection in the lobSTR v.3 project on cloud.curoverse.com.
~$ arv-copy --src qr1hi --dst dst_cluster qr1hi-4zz18-tci4vn4fa95w0zx
-qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0% 
+qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0%
 arvados.arv-copy[1234] INFO: Success: created copy with uuid dst_cluster-4zz18-8765943210cdbae
 
@@ -48,7 +48,7 @@ The output of arv-copy displays the uuid of the collection generated in the dest For example, this will copy the collection to project dst_cluster-j7d0g-a894213ukjhal12 in the destination cluster. -
~$ arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx 
+ 
~$ arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx
 
@@ -66,7 +66,7 @@ arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-p5p6p-
-New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@. +New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@. By default, if you copy a pipeline template recursively, you will find that the template as well as all the dependencies are in your home project. diff --git a/doc/user/topics/crunch-tools-overview.html.textile.liquid b/doc/user/topics/crunch-tools-overview.html.textile.liquid index 994f437b5b..0e19be12b6 100644 --- a/doc/user/topics/crunch-tools-overview.html.textile.liquid +++ b/doc/user/topics/crunch-tools-overview.html.textile.liquid @@ -4,6 +4,8 @@ navsection: userguide title: "Tools for writing Crunch pipelines" ... +{% include 'pipeline_deprecation_notice' %} + Arvados includes a number of tools to help you develop pipelines and jobs for Crunch. This overview explains each tool's intended use to help you choose the right one. h2. Use the "arv-run command-line utility":arv-run.html diff --git a/doc/user/topics/running-pipeline-command-line.html.textile.liquid b/doc/user/topics/running-pipeline-command-line.html.textile.liquid index 9f10fe43df..9a2e12c096 100644 --- a/doc/user/topics/running-pipeline-command-line.html.textile.liquid +++ b/doc/user/topics/running-pipeline-command-line.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Running a pipeline on the command line" +title: "Running an Arvados pipeline" ... This tutorial demonstrates how to use the command line to run the same pipeline as described in "running a pipeline using Workbench.":{{site.baseurl}}/user/tutorials/tutorial-pipeline-workbench.html diff --git a/doc/user/topics/tutorial-parallel.html.textile.liquid b/doc/user/topics/tutorial-parallel.html.textile.liquid index 6d0058b5e9..3a7f85cc9a 100644 --- a/doc/user/topics/tutorial-parallel.html.textile.liquid +++ b/doc/user/topics/tutorial-parallel.html.textile.liquid @@ -4,6 +4,8 @@ navsection: userguide title: "Concurrent Crunch tasks" ... +{% include 'pipeline_deprecation_notice' %} + In the previous tutorials, we used @arvados.job_setup.one_task_per_input_file()@ to automatically create concurrent jobs by creating a separate task per file. For some types of jobs, you may need to split the work up differently, for example creating tasks to process different segments of a single large file. This tutorial will demonstrate how to create Crunch tasks directly. Start by entering the @crunch_scripts@ directory of your Git repository: diff --git a/doc/user/tutorials/running-external-program.html.textile.liquid b/doc/user/tutorials/running-external-program.html.textile.liquid index 90dc1970a7..ef4634ee74 100644 --- a/doc/user/tutorials/running-external-program.html.textile.liquid +++ b/doc/user/tutorials/running-external-program.html.textile.liquid @@ -4,6 +4,8 @@ navsection: userguide title: "Writing a pipeline template" ... +{% include 'pipeline_deprecation_notice' %} + This tutorial demonstrates how to construct a two stage pipeline template that uses the "bwa mem":http://bio-bwa.sourceforge.net/ tool to produce a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file, then uses the "Picard SortSam tool":http://picard.sourceforge.net/command-line-overview.shtml#SortSam to produce a BAM (Binary Alignment/Map) file. {% include 'tutorial_expectations' %} @@ -64,7 +66,7 @@ For more information and examples for writing pipelines, see the "pipeline templ h2. Re-using your pipeline run -Arvados allows users to re-use jobs that have the same inputs in order to save computing time and resources. Users are able to change a job downstream without re-computing earlier jobs. This section shows which version control parameters should be tuned to make sure Arvados will not re-compute your jobs. +Arvados allows users to re-use jobs that have the same inputs in order to save computing time and resources. Users are able to change a job downstream without re-computing earlier jobs. This section shows which version control parameters should be tuned to make sure Arvados will not re-compute your jobs. Note: Job reuse can only happen if all input collections do not change. diff --git a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid index bf73c8cc19..d4caafef5c 100644 --- a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid +++ b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid @@ -5,6 +5,8 @@ navmenu: Tutorials title: "Writing a Crunch script" ... +{% include 'pipeline_deprecation_notice' %} + This tutorial demonstrates how to write a script using Arvados Python SDK. The Arvados SDK supports access to advanced features not available using the @run-command@ wrapper, such as scheduling concurrent tasks across nodes. {% include 'tutorial_expectations' %} diff --git a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid index b17f951e74..47e8dc750c 100644 --- a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid +++ b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid @@ -5,6 +5,8 @@ navmenu: Tutorials title: "Running on an Arvados cluster" ... +{% include 'pipeline_deprecation_notice' %} + This tutorial demonstrates how to create a pipeline to run your crunch script on an Arvados cluster. Cluster jobs can scale out to multiple nodes, and use @git@ and @docker@ to store the complete system snapshot required to achieve reproducibilty. {% include 'tutorial_expectations' %} -- 2.30.2