X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c0f1da371e57752ac48d314264fdd445a05192cb..1465471dacdc30047c02376cd9800efd07d17974:/crunch_scripts/arvados-bcbio-nextgen.py diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py index 070e8cdbaf..b7e19ecddb 100755 --- a/crunch_scripts/arvados-bcbio-nextgen.py +++ b/crunch_scripts/arvados-bcbio-nextgen.py @@ -1,10 +1,15 @@ #!/usr/bin/python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 import arvados import subprocess -import subst +import crunchutil.subst as subst import shutil import os +import sys +import time if len(arvados.current_task()['parameters']) > 0: p = arvados.current_task()['parameters'] @@ -17,9 +22,7 @@ os.unlink("/usr/local/share/bcbio-nextgen/galaxy") os.mkdir("/usr/local/share/bcbio-nextgen/galaxy") shutil.copy("/usr/local/share/bcbio-nextgen/config/bcbio_system.yaml", "/usr/local/share/bcbio-nextgen/galaxy") -os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data") - -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/tool_data_table_conf.xml", "w") as f: +with open("/usr/local/share/bcbio-nextgen/galaxy/tool_data_table_conf.xml", "w") as f: f.write(''' @@ -54,11 +57,28 @@ with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/tool_data_table_conf. ''') -os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/templates") +os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data") + +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f: + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))\n")) + +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_index.loc", "w") as f: + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_index))\n")) + +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f: + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))\n")) + +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f: + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))\n")) + +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f: + f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n")) -with open("/usr/local/share/bcbio-nextgen/galaxy/templates/gatk-variant.yaml", "w") as f: +with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f: f.write(''' -# Template for whole genome Illumina variant calling with GATK pipeline +# Template for whole genome Illumina variant calling with FreeBayes +# This is a GATK-free pipeline without post-alignment BAM pre-processing +# (recalibration and realignment) --- details: - analysis: variant2 @@ -68,38 +88,58 @@ details: # batch: your-arbitrary-batch-name algorithm: aligner: bwa - mark_duplicates: picard - recalibrate: gatk - realign: gatk - variantcaller: gatk-haplotype + mark_duplicates: true + recalibrate: false + realign: false + variantcaller: freebayes platform: illumina quality_format: Standard - coverage_interval: genome # for targetted projects, set the region # variant_regions: /path/to/your.bed ''') -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))")) +os.unlink("/usr/local/share/bcbio-nextgen/gemini_data") +os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data") -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_indices))")) +os.chdir(arvados.current_task().tmpdir) -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))")) +rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1", + subst.do_substitution(p, "$(file $(R1))"), + subst.do_substitution(p, "$(file $(R2))")]) -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))")) +os.chdir("project1/work") -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))")) +os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data") -os.chdir(arvados.current_task().tmpdir) +rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ['CRUNCH_NODE_SLOTS']]) -rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "gatk-variant", "project1", - subst.do_substitution(p, "$(R1)"), - subst.do_substitution(p, "$(R2)")]) +print("run-command: completed with exit code %i (%s)" % (rcode, "success" if rcode == 0 else "failed")) -os.chdir("project1/work") +if rcode == 0: + os.chdir("../final") + + print("arvados-bcbio-nextgen: the follow output files will be saved to keep:") + + subprocess.call(["find", ".", "-type", "f", "-printf", "arvados-bcbio-nextgen: %12.12s %h/%f\\n"]) + + print("arvados-bcbio-nextgen: start writing output to keep") + + done = False + api = arvados.api('v1') + while not done: + try: + out = arvados.CollectionWriter() + out.write_directory_tree(".", max_manifest_depth=0) + outuuid = out.finish() + api.job_tasks().update(uuid=arvados.current_task()['uuid'], + body={ + 'output':outuuid, + 'success': (rcode == 0), + 'progress':1.0 + }).execute() + done = True + except Exception as e: + print("arvados-bcbio-nextgen: caught exception: {}".format(e)) + time.sleep(5) -rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml"]) +sys.exit(rcode)