From b7fe2ea36c87fa648f019c20679b50ab462aec5a Mon Sep 17 00:00:00 2001 From: Peter Amstutz <peter.amstutz@curoverse.com> Date: Wed, 15 Oct 2014 15:16:41 -0400 Subject: [PATCH] 3609: Added documentation page. Added to "arv" frontend command. Bug fix to print help when there are no command line arguments. --- doc/_config.yml | 1 + doc/user/topics/arv-run.html.textile.liquid | 67 +++++++++++++++++++++ sdk/cli/bin/arv | 4 +- sdk/python/arvados/commands/run.py | 4 ++ 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 doc/user/topics/arv-run.html.textile.liquid diff --git a/doc/_config.yml b/doc/_config.yml index 61bfb6f85c..b03a18dff5 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -32,6 +32,7 @@ navbar: - user/topics/keep.html.textile.liquid - Run a pipeline on the command line: - user/topics/running-pipeline-command-line.html.textile.liquid + - user/topics/arv-run.html.textile.liquid - user/reference/sdk-cli.html.textile.liquid - Develop a new pipeline: - user/tutorials/intro-crunch.html.textile.liquid diff --git a/doc/user/topics/arv-run.html.textile.liquid b/doc/user/topics/arv-run.html.textile.liquid new file mode 100644 index 0000000000..b406e6b159 --- /dev/null +++ b/doc/user/topics/arv-run.html.textile.liquid @@ -0,0 +1,67 @@ +--- +layout: default +navsection: userguide +title: "Using arv-run" +... + +The @arv-run@ command enables you create Arvados pipelines at the command line that fan out to multiple concurrent tasks across Arvado compute nodes. + +{% include 'tutorial_expectations' %} + +h1. Quick introduction + +Run one @grep@ task per file, and redirect the output to output.txt + +<notextile> +<pre> +$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span> +$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC -- *.fastq \> output.txt</span> +Running pipeline qr1hi-d1hrv-mg3bju0u7r6w241 +</pre> +</notextile> + +h1. Usage + +@arv-run@ takes a command or command pipeline, along with stdin and stdout redirection, and creates an Arvados pipeline to run the command. The syntax is designed to mimic standard shell syntax, so it is usually necessary to quote the metacharacters < > and | as either \< \> and \| or '<' '>' and '|'. + +@arv-run@ introspects the command line to determine which arguments are file inputs. If you specify a file that is only available on the local filesystem, it will be first uploaded to Arvados, and then the command line will be rewritten to refer to the newly uploaded file. @arv-run@ also works together with @arv-mount@ to identify if a file specified on the command line is part of an Arvados collection. If so, the command line will be rewritten to refer to the file within the collection without any upload necessary. + +@arv-run@ will parallelize on the files listed on the command line after @--@. You may specify @--batch-size N@ after the @--@ but before listing any files to specify how many files to provide put on the command line for each task (see below for example). + +You may use stdin @<@ redirection on multiple files. This will create a separate task for each input file. + +You are only permitted to supply a single file name for stdout @>@ redirection. If there are multiple tasks, their output will be collated at the end of the pipeline. Alternately, you may use "run-command":run-command.html parameter substitution in the file name to generate different filenames for each task. + +Multiple commands connected by pipes all execute in the same container. If you need to capture intermediate results of a pipe, use the @tee@ command. + +@arv-run@ commands always run inside a Docker image. By default, this is "arvados/jobs". Use @arv --docker-image IMG@ to specify the image to use. Note: the Docker image must be uploaded to Arvados using @arv keep docker@. + +Use @arv-run --dry-run@ to print out the final Arvados pipeline generated by @arv-run@ without submitting it. + +By default, the pipeline will be submitted to your configured Arvado instance. Use @arv-run --local@ to run the command locally using "arv-crunch-job". + +h1. Examples + +Run one @grep@ task per file, with each input files piped from stdin. Redirect the output to output.txt. + +<notextile> +<pre> +$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> output.txt</span> +</pre> +</notextile> + +Run @cat | grep@ once per file. Redirect the output to output.txt. + +<notextile> +<pre> +$ <span class="userinput">arv-run cat -- *.fastq \| grep -H -n ATTGGAGGAAAGATGAGTGAC \> output.txt</span> +</pre> +</notextile> + +Run @bwa@ for pairs of fastq files in "inputs" using the reference human_g1k_v37.fasta. + +<notextile> +<pre> +<span class="userinput">arv-run --docker-image arvados/jobs-java-bwa-samtools bwa mem reference/human_g1k_v37.fasta -- --batch-size 2 inputs/*.fastq \> '$(task.uuid).sam'</span> +</pre> +</notextile> diff --git a/sdk/cli/bin/arv b/sdk/cli/bin/arv index 9b486d2d79..59bdfae81a 100755 --- a/sdk/cli/bin/arv +++ b/sdk/cli/bin/arv @@ -112,7 +112,7 @@ def init_config end end -subcommands = %w(keep pipeline tag ws edit) +subcommands = %w(keep pipeline run tag ws edit) def check_subcommands client, arvados, subcommand, global_opts, remaining_opts case subcommand @@ -142,6 +142,8 @@ def check_subcommands client, arvados, subcommand, global_opts, remaining_opts puts "Available methods: run" end abort + when 'run' + exec `which arv-run`.strip, *remaining_opts when 'tag' exec `which arv-tag`.strip, *remaining_opts when 'ws' diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py index 551f955d3b..a15a457c5e 100644 --- a/sdk/python/arvados/commands/run.py +++ b/sdk/python/arvados/commands/run.py @@ -60,6 +60,10 @@ def statfile(prefix, fn): def main(arguments=None): args = arvrun_parser.parse_args(arguments) + if len(args.args) == 0: + arvrun_parser.print_help() + return + reading_into = 2 slots = [[], [], []] -- 2.30.2