X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/42acc196ca8f5908f3cc214d5f715eb1db82c7a8..ca06cfbda0e84d469f7810a280cfa4dfa8997260:/crunch_scripts/arvados-bcbio-nextgen.py diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py index 8781087667..b7e19ecddb 100755 --- a/crunch_scripts/arvados-bcbio-nextgen.py +++ b/crunch_scripts/arvados-bcbio-nextgen.py @@ -1,10 +1,15 @@ #!/usr/bin/python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 import arvados import subprocess -import subst +import crunchutil.subst as subst import shutil import os +import sys +import time if len(arvados.current_task()['parameters']) > 0: p = arvados.current_task()['parameters'] @@ -55,23 +60,25 @@ with open("/usr/local/share/bcbio-nextgen/galaxy/tool_data_table_conf.xml", "w") os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data") with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))")) + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))\n")) -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_indices))")) +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_index.loc", "w") as f: + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_index))\n")) with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))")) + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))\n")) with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))")) + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))\n")) with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))")) + f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n")) -with open("/tmp/crunch-job/gatk-variant.yaml", "w") as f: +with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f: f.write(''' -# Template for whole genome Illumina variant calling with GATK pipeline +# Template for whole genome Illumina variant calling with FreeBayes +# This is a GATK-free pipeline without post-alignment BAM pre-processing +# (recalibration and realignment) --- details: - analysis: variant2 @@ -81,20 +88,22 @@ details: # batch: your-arbitrary-batch-name algorithm: aligner: bwa - mark_duplicates: picard - recalibrate: gatk - realign: gatk - variantcaller: gatk-haplotype + mark_duplicates: true + recalibrate: false + realign: false + variantcaller: freebayes platform: illumina quality_format: Standard - coverage_interval: genome # for targetted projects, set the region # variant_regions: /path/to/your.bed ''') +os.unlink("/usr/local/share/bcbio-nextgen/gemini_data") +os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data") + os.chdir(arvados.current_task().tmpdir) -rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/gatk-variant.yaml", "project1", +rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1", subst.do_substitution(p, "$(file $(R1))"), subst.do_substitution(p, "$(file $(R2))")]) @@ -102,4 +111,35 @@ os.chdir("project1/work") os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data") -rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml"]) +rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ['CRUNCH_NODE_SLOTS']]) + +print("run-command: completed with exit code %i (%s)" % (rcode, "success" if rcode == 0 else "failed")) + +if rcode == 0: + os.chdir("../final") + + print("arvados-bcbio-nextgen: the follow output files will be saved to keep:") + + subprocess.call(["find", ".", "-type", "f", "-printf", "arvados-bcbio-nextgen: %12.12s %h/%f\\n"]) + + print("arvados-bcbio-nextgen: start writing output to keep") + + done = False + api = arvados.api('v1') + while not done: + try: + out = arvados.CollectionWriter() + out.write_directory_tree(".", max_manifest_depth=0) + outuuid = out.finish() + api.job_tasks().update(uuid=arvados.current_task()['uuid'], + body={ + 'output':outuuid, + 'success': (rcode == 0), + 'progress':1.0 + }).execute() + done = True + except Exception as e: + print("arvados-bcbio-nextgen: caught exception: {}".format(e)) + time.sleep(5) + +sys.exit(rcode)