#!/usr/bin/env python # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 import arvados import arvados_bwa import os import re import sys this_job = arvados.current_job() this_task = arvados.current_task() ref_dir = arvados.util.collection_extract( collection = this_job['script_parameters']['input'], path = 'reference', decompress = False) ref_fasta_files = (os.path.join(ref_dir, f) for f in os.listdir(ref_dir) if re.search(r'\.fasta(\.gz)?$', f)) # build reference index arvados_bwa.run('index', ['-a', 'bwtsw'] + list(ref_fasta_files)) # move output files to new empty directory out_dir = os.path.join(arvados.current_task().tmpdir, 'out') arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr) os.mkdir(out_dir) for f in os.listdir(ref_dir): if re.search(r'\.(amb|ann|bwt|pac|rbwt|rpac|rsa|sa)$', f): sys.stderr.write("bwa output: %s (%d)\n" % (f, os.stat(os.path.join(ref_dir, f)).st_size)) os.rename(os.path.join(ref_dir, f), os.path.join(out_dir, f)) # store output out = arvados.CollectionWriter() out.write_directory_tree(out_dir, max_manifest_depth=0) this_task.set_output(out.finish())