From 82a471c92036198aaf02ca0467ea48d49dbe822d Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Tue, 10 Dec 2013 14:39:44 -0800 Subject: [PATCH] Add docstrings to arvados_samtools module. closes #1639 --- crunch_scripts/arvados_samtools.py | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/crunch_scripts/arvados_samtools.py b/crunch_scripts/arvados_samtools.py index dd7a42874f..6f4d966458 100644 --- a/crunch_scripts/arvados_samtools.py +++ b/crunch_scripts/arvados_samtools.py @@ -8,6 +8,10 @@ import subprocess samtools_path = None def samtools_install_path(): + """ + Extract the samtools source tree, build the samtools binary, and + return the path to the source tree. + """ global samtools_path if samtools_path: return samtools_path @@ -25,9 +29,24 @@ def samtools_install_path(): return samtools_path def samtools_binary(): + """ + Return the path to the samtools executable. + """ return os.path.join(samtools_install_path(), 'samtools') def run(command, command_args, **kwargs): + """ + Build and run the samtools binary. + + command is the samtools subcommand, e.g., "view" or "sort". + + command_args is a list of additional command line arguments, e.g., + ['-bt', 'ref_list.txt', '-o', 'aln.bam', 'aln.sam.gz'] + + It is assumed that we are running in a Crunch job environment, and + the job's "samtools_tgz" parameter is a collection containing the + samtools source tree in a .tgz file. + """ execargs = [samtools_binary(), command] execargs += command_args @@ -40,6 +59,20 @@ def run(command, command_args, **kwargs): stdout=kwargs.get('stdout', sys.stderr)) def one_task_per_bam_file(if_sequence=0, and_end_task=True): + """ + Queue one task for each bam file in this job's input collection. + + Each new task will have an "input" parameter: a manifest + containing one .bam file and (if available) the corresponding .bai + index file. + + Files in the input collection that are not named *.bam or *.bai + (as well as *.bai files that do not match any .bam file present) + are silently ignored. + + if_sequence and and_end_task arguments have the same significance + as in arvados.job_setup.one_task_per_input_file(). + """ if if_sequence != arvados.current_task()['sequence']: return job_input = arvados.current_job()['script_parameters']['input'] -- 2.30.2