8 bwa_install_path = None
12 Extract the bwa source tree, build the bwa binary, and return the
13 path to the source tree.
15 global bwa_install_path
17 return bwa_install_path
19 bwa_install_path = arvados.util.tarball_extract(
20 tarball = arvados.current_job()['script_parameters']['bwa_tbz'],
24 lockfile = open(os.path.split(bwa_install_path)[0] + '.bwa-make.lock',
26 fcntl.flock(lockfile, fcntl.LOCK_EX)
27 arvados.util.run_command(['make', '-j16'], cwd=bwa_install_path)
30 return bwa_install_path
34 Return the path to the bwa executable.
36 return os.path.join(install_path(), 'bwa')
38 def run(command, command_args, **kwargs):
40 Build and run the bwa binary.
42 command is the bwa module, e.g., "index" or "aln".
44 command_args is a list of additional command line arguments, e.g.,
45 ['-a', 'bwtsw', 'ref.fasta']
47 It is assumed that we are running in a Crunch job environment, and
48 the job's "bwa_tbz" parameter is a collection containing the bwa
49 source tree in a .tbz file.
51 execargs = [bwa_binary(),
53 execargs += command_args
54 sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
55 arvados.util.run_command(
57 cwd=arvados.current_task().tmpdir,
59 stdin=kwargs.get('stdin', subprocess.PIPE),
60 stdout=kwargs.get('stdout', sys.stderr))
62 def one_task_per_pair_input_file(if_sequence=0, and_end_task=True):
64 Queue one task for each pair of fastq files in this job's input
67 Each new task will have two parameters, named "input_1" and
68 "input_2", each being a manifest containing a single fastq file.
70 A matching pair of files in the input collection is assumed to
71 have names "x_1.y" and "x_2.y".
73 Files in the input collection that are not part of a matched pair
76 if_sequence and and_end_task arguments have the same significance
77 as in arvados.job_setup.one_task_per_input_file().
79 if if_sequence != arvados.current_task()['sequence']:
81 job_input = arvados.current_job()['script_parameters']['input']
82 cr = arvados.CollectionReader(job_input)
84 for s in cr.all_streams():
85 all_files += list(s.all_files())
86 for s in cr.all_streams():
87 for left_file in s.all_files():
88 left_name = left_file.name()
90 right_name = re.sub(r'(.*_)1\.', '\g<1>2.', left_name)
91 if right_name == left_name:
93 for f2 in s.all_files():
94 if right_name == f2.name():
96 if right_file != None:
98 'job_uuid': arvados.current_job()['uuid'],
99 'created_by_job_task_uuid': arvados.current_task()['uuid'],
100 'sequence': if_sequence + 1,
102 'input_1':left_file.as_manifest(),
103 'input_2':right_file.as_manifest()
106 arvados.api().job_tasks().create(body=new_task_attrs).execute()
108 arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'],
109 body={'success':True}