X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d28b1cebc8b799ea1e65a50826602392b446ea29..23a70b3597649d58014fa8968271b8c850306af9:/crunch_scripts/arvados-bcbio-nextgen.py diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py index 37ac7d370c..b7e19ecddb 100755 --- a/crunch_scripts/arvados-bcbio-nextgen.py +++ b/crunch_scripts/arvados-bcbio-nextgen.py @@ -1,10 +1,15 @@ #!/usr/bin/python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 import arvados import subprocess -import subst +import crunchutil.subst as subst import shutil import os +import sys +import time if len(arvados.current_task()['parameters']) > 0: p = arvados.current_task()['parameters'] @@ -69,9 +74,11 @@ with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f: f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n")) -with open("/tmp/crunch-job/gatk-variant.yaml", "w") as f: +with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f: f.write(''' -# Template for whole genome Illumina variant calling with GATK pipeline +# Template for whole genome Illumina variant calling with FreeBayes +# This is a GATK-free pipeline without post-alignment BAM pre-processing +# (recalibration and realignment) --- details: - analysis: variant2 @@ -81,20 +88,22 @@ details: # batch: your-arbitrary-batch-name algorithm: aligner: bwa - mark_duplicates: picard - recalibrate: gatk - realign: gatk - variantcaller: gatk-haplotype + mark_duplicates: true + recalibrate: false + realign: false + variantcaller: freebayes platform: illumina quality_format: Standard - coverage_interval: genome # for targetted projects, set the region # variant_regions: /path/to/your.bed ''') +os.unlink("/usr/local/share/bcbio-nextgen/gemini_data") +os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data") + os.chdir(arvados.current_task().tmpdir) -rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/gatk-variant.yaml", "project1", +rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1", subst.do_substitution(p, "$(file $(R1))"), subst.do_substitution(p, "$(file $(R2))")]) @@ -116,6 +125,7 @@ if rcode == 0: print("arvados-bcbio-nextgen: start writing output to keep") done = False + api = arvados.api('v1') while not done: try: out = arvados.CollectionWriter()