X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d77cf59c6dc0c70f8806d17e03576af272dc425d..d084ca24b06c598271844d2ba4c8c40f251c0999:/crunch_scripts/arvados_bwa.py diff --git a/crunch_scripts/arvados_bwa.py b/crunch_scripts/arvados_bwa.py index b34d72fd92..aefc1f064b 100644 --- a/crunch_scripts/arvados_bwa.py +++ b/crunch_scripts/arvados_bwa.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + import arvados import re import os @@ -7,30 +11,47 @@ import subprocess bwa_install_path = None -def setup(): +def install_path(): + """ + Extract the bwa source tree, build the bwa binary, and return the + path to the source tree. + """ global bwa_install_path if bwa_install_path: return bwa_install_path - bwa_path = arvados.util.tarball_extract( + + bwa_install_path = arvados.util.tarball_extract( tarball = arvados.current_job()['script_parameters']['bwa_tbz'], path = 'bwa') # build "bwa" binary - lockfile = open(os.path.split(bwa_path)[0] + '.bwa-make.lock', + lockfile = open(os.path.split(bwa_install_path)[0] + '.bwa-make.lock', 'w') fcntl.flock(lockfile, fcntl.LOCK_EX) - arvados.util.run_command(['make', '-j16'], cwd=bwa_path) + arvados.util.run_command(['make', '-j16'], cwd=bwa_install_path) lockfile.close() - bwa_install_path = bwa_path - return bwa_path + return bwa_install_path def bwa_binary(): - global bwa_install_path - return os.path.join(bwa_install_path, 'bwa') + """ + Return the path to the bwa executable. + """ + return os.path.join(install_path(), 'bwa') def run(command, command_args, **kwargs): - global bwa_install_path + """ + Build and run the bwa binary. + + command is the bwa module, e.g., "index" or "aln". + + command_args is a list of additional command line arguments, e.g., + ['-a', 'bwtsw', 'ref.fasta'] + + It is assumed that we are running in a Crunch job environment, and + the job's "bwa_tbz" parameter is a collection containing the bwa + source tree in a .tbz file. + """ execargs = [bwa_binary(), command] execargs += command_args @@ -43,6 +64,22 @@ def run(command, command_args, **kwargs): stdout=kwargs.get('stdout', sys.stderr)) def one_task_per_pair_input_file(if_sequence=0, and_end_task=True): + """ + Queue one task for each pair of fastq files in this job's input + collection. + + Each new task will have two parameters, named "input_1" and + "input_2", each being a manifest containing a single fastq file. + + A matching pair of files in the input collection is assumed to + have names "x_1.y" and "x_2.y". + + Files in the input collection that are not part of a matched pair + are silently ignored. + + if_sequence and and_end_task arguments have the same significance + as in arvados.job_setup.one_task_per_input_file(). + """ if if_sequence != arvados.current_task()['sequence']: return job_input = arvados.current_job()['script_parameters']['input'] @@ -76,5 +113,3 @@ def one_task_per_pair_input_file(if_sequence=0, and_end_task=True): body={'success':True} ).execute() exit(0) - -setup()