X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/1875ddb761e4ae2909d2afe0718f3d0ad5f3ce0f..0561bd0c3c07257fd58ded6c7cfa5feeae97af57:/crunch_scripts/arvados-bcbio-nextgen.py diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py index cb5eafe121..b7e19ecddb 100755 --- a/crunch_scripts/arvados-bcbio-nextgen.py +++ b/crunch_scripts/arvados-bcbio-nextgen.py @@ -1,10 +1,15 @@ #!/usr/bin/python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 import arvados import subprocess -import subst +import crunchutil.subst as subst import shutil import os +import sys +import time if len(arvados.current_task()['parameters']) > 0: p = arvados.current_task()['parameters'] @@ -69,9 +74,11 @@ with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f: f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n")) -with open("/tmp/crunch-job/gatk-variant.yaml", "w") as f: +with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f: f.write(''' -# Template for whole genome Illumina variant calling with GATK pipeline +# Template for whole genome Illumina variant calling with FreeBayes +# This is a GATK-free pipeline without post-alignment BAM pre-processing +# (recalibration and realignment) --- details: - analysis: variant2 @@ -81,20 +88,22 @@ details: # batch: your-arbitrary-batch-name algorithm: aligner: bwa - mark_duplicates: picard - recalibrate: gatk - realign: gatk - variantcaller: gatk-haplotype + mark_duplicates: true + recalibrate: false + realign: false + variantcaller: freebayes platform: illumina quality_format: Standard - coverage_interval: genome # for targetted projects, set the region # variant_regions: /path/to/your.bed ''') +os.unlink("/usr/local/share/bcbio-nextgen/gemini_data") +os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data") + os.chdir(arvados.current_task().tmpdir) -rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/gatk-variant.yaml", "project1", +rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1", subst.do_substitution(p, "$(file $(R1))"), subst.do_substitution(p, "$(file $(R2))")]) @@ -102,4 +111,35 @@ os.chdir("project1/work") os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data") -rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ('CRUNCH_NODE_SLOTS')]) +rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ['CRUNCH_NODE_SLOTS']]) + +print("run-command: completed with exit code %i (%s)" % (rcode, "success" if rcode == 0 else "failed")) + +if rcode == 0: + os.chdir("../final") + + print("arvados-bcbio-nextgen: the follow output files will be saved to keep:") + + subprocess.call(["find", ".", "-type", "f", "-printf", "arvados-bcbio-nextgen: %12.12s %h/%f\\n"]) + + print("arvados-bcbio-nextgen: start writing output to keep") + + done = False + api = arvados.api('v1') + while not done: + try: + out = arvados.CollectionWriter() + out.write_directory_tree(".", max_manifest_depth=0) + outuuid = out.finish() + api.job_tasks().update(uuid=arvados.current_task()['uuid'], + body={ + 'output':outuuid, + 'success': (rcode == 0), + 'progress':1.0 + }).execute() + done = True + except Exception as e: + print("arvados-bcbio-nextgen: caught exception: {}".format(e)) + time.sleep(5) + +sys.exit(rcode)