X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9060fbf9984b5e05081931c4c30846fd304ebced..ad464d416de6996a41d0c752124e0d201de0d3fc:/crunch_scripts/arvados-bcbio-nextgen.py diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py index feb8ce0ac8..9351b05853 100755 --- a/crunch_scripts/arvados-bcbio-nextgen.py +++ b/crunch_scripts/arvados-bcbio-nextgen.py @@ -2,9 +2,11 @@ import arvados import subprocess -import subst +import crunchutil.subst as subst import shutil import os +import sys +import time if len(arvados.current_task()['parameters']) > 0: p = arvados.current_task()['parameters'] @@ -17,9 +19,7 @@ os.unlink("/usr/local/share/bcbio-nextgen/galaxy") os.mkdir("/usr/local/share/bcbio-nextgen/galaxy") shutil.copy("/usr/local/share/bcbio-nextgen/config/bcbio_system.yaml", "/usr/local/share/bcbio-nextgen/galaxy") -os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data") - -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/tool_data_table_conf.xml", "w") as f: +with open("/usr/local/share/bcbio-nextgen/galaxy/tool_data_table_conf.xml", "w") as f: f.write(''' @@ -54,27 +54,89 @@ with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/tool_data_table_conf. ''') +os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data") + with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))")) + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))\n")) -with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_indices))")) +with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_index.loc", "w") as f: + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_index))\n")) with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))")) + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))\n")) with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f: - f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))")) + f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))\n")) with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f: - f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))")) + f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n")) + +with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f: + f.write(''' +# Template for whole genome Illumina variant calling with FreeBayes +# This is a GATK-free pipeline without post-alignment BAM pre-processing +# (recalibration and realignment) +--- +details: + - analysis: variant2 + genome_build: GRCh37 + # to do multi-sample variant calling, assign samples the same metadata / batch + # metadata: + # batch: your-arbitrary-batch-name + algorithm: + aligner: bwa + mark_duplicates: true + recalibrate: false + realign: false + variantcaller: freebayes + platform: illumina + quality_format: Standard + # for targetted projects, set the region + # variant_regions: /path/to/your.bed +''') + +os.unlink("/usr/local/share/bcbio-nextgen/gemini_data") +os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data") os.chdir(arvados.current_task().tmpdir) -rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "gatk-variant", "project1", - subst.do_substitution(p, "$(R1)"), - subst.do_substitution(p, "$(R2)")]) +rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1", + subst.do_substitution(p, "$(file $(R1))"), + subst.do_substitution(p, "$(file $(R2))")]) os.chdir("project1/work") -rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml"]) +os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data") + +rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ['CRUNCH_NODE_SLOTS']]) + +print("run-command: completed with exit code %i (%s)" % (rcode, "success" if rcode == 0 else "failed")) + +if rcode == 0: + os.chdir("../final") + + print("arvados-bcbio-nextgen: the follow output files will be saved to keep:") + + subprocess.call(["find", ".", "-type", "f", "-printf", "arvados-bcbio-nextgen: %12.12s %h/%f\\n"]) + + print("arvados-bcbio-nextgen: start writing output to keep") + + done = False + api = arvados.api('v1') + while not done: + try: + out = arvados.CollectionWriter() + out.write_directory_tree(".", max_manifest_depth=0) + outuuid = out.finish() + api.job_tasks().update(uuid=arvados.current_task()['uuid'], + body={ + 'output':outuuid, + 'success': (rcode == 0), + 'progress':1.0 + }).execute() + done = True + except Exception as e: + print("arvados-bcbio-nextgen: caught exception: {}".format(e)) + time.sleep(5) + +sys.exit(rcode)