samtools_path = None
def samtools_install_path():
+ """
+ Extract the samtools source tree, build the samtools binary, and
+ return the path to the source tree.
+ """
global samtools_path
if samtools_path:
return samtools_path
return samtools_path
def samtools_binary():
+ """
+ Return the path to the samtools executable.
+ """
return os.path.join(samtools_install_path(), 'samtools')
def run(command, command_args, **kwargs):
+ """
+ Build and run the samtools binary.
+
+ command is the samtools subcommand, e.g., "view" or "sort".
+
+ command_args is a list of additional command line arguments, e.g.,
+ ['-bt', 'ref_list.txt', '-o', 'aln.bam', 'aln.sam.gz']
+
+ It is assumed that we are running in a Crunch job environment, and
+ the job's "samtools_tgz" parameter is a collection containing the
+ samtools source tree in a .tgz file.
+ """
execargs = [samtools_binary(),
command]
execargs += command_args
stdout=kwargs.get('stdout', sys.stderr))
def one_task_per_bam_file(if_sequence=0, and_end_task=True):
+ """
+ Queue one task for each bam file in this job's input collection.
+
+ Each new task will have an "input" parameter: a manifest
+ containing one .bam file and (if available) the corresponding .bai
+ index file.
+
+ Files in the input collection that are not named *.bam or *.bai
+ (as well as *.bai files that do not match any .bam file present)
+ are silently ignored.
+
+ if_sequence and and_end_task arguments have the same significance
+ as in arvados.job_setup.one_task_per_input_file().
+ """
if if_sequence != arvados.current_task()['sequence']:
return
job_input = arvados.current_job()['script_parameters']['input']