9 this_job = arvados.current_job()
10 this_task = arvados.current_task()
11 fastq_path = arvados.util.collection_extract(
12 collection = this_job['script_parameters']['input'],
14 fastq_files = filter(lambda f: f != '.locator', os.listdir(fastq_path))
15 tmp_dir_base = os.path.join(arvados.current_task().tmpdir, 'tmp')
16 out_dir = os.path.join(arvados.current_task().tmpdir, 'reads')
18 arvados.util.run_command(['rm', '-rf', tmp_dir_base], stderr=sys.stderr)
19 arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
20 os.mkdir(tmp_dir_base)
22 # convert fastq to sdf
24 for leftarm in fastq_files:
25 if re.search(r'_1.f(ast)?q(.gz)?$', leftarm):
26 rightarm = re.sub(r'_1(.f(ast)?q(.gz)?)$', '_2\\1', leftarm)
27 if rightarm in fastq_files:
28 tmp_dirs += ['%s/%08d' % (tmp_dir_base, len(tmp_dirs))]
29 pyrtg.run_rtg('format', tmp_dirs[-1],
32 '-l', os.path.join(fastq_path, leftarm),
33 '-r', os.path.join(fastq_path, rightarm)])
36 pyrtg.run_rtg('sdfsplit', out_dir,
37 ['-n', '1500000'] + tmp_dirs)
40 out = arvados.CollectionWriter()
41 out.write_directory_tree(out_dir, max_manifest_depth=1)
42 this_task.set_output(out.finish())