2411: Maintain copyright header in structure.sql.
[arvados.git] / crunch_scripts / arvados-bcbio-nextgen.py
index 37ac7d370c03b2e70e98cfa58a8292fcd5fb093f..b7e19ecddb67061b3addde4a82e8a4a59c969202 100755 (executable)
@@ -1,10 +1,15 @@
 #!/usr/bin/python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 import arvados
 import subprocess
-import subst
+import crunchutil.subst as subst
 import shutil
 import os
+import sys
+import time
 
 if len(arvados.current_task()['parameters']) > 0:
     p = arvados.current_task()['parameters']
@@ -69,9 +74,11 @@ with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w
 with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f:
     f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n"))
 
-with open("/tmp/crunch-job/gatk-variant.yaml", "w") as f:
+with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f:
     f.write('''
-# Template for whole genome Illumina variant calling with GATK pipeline
+# Template for whole genome Illumina variant calling with FreeBayes
+# This is a GATK-free pipeline without post-alignment BAM pre-processing
+# (recalibration and realignment)
 ---
 details:
   - analysis: variant2
@@ -81,20 +88,22 @@ details:
     #   batch: your-arbitrary-batch-name
     algorithm:
       aligner: bwa
-      mark_duplicates: picard
-      recalibrate: gatk
-      realign: gatk
-      variantcaller: gatk-haplotype
+      mark_duplicates: true
+      recalibrate: false
+      realign: false
+      variantcaller: freebayes
       platform: illumina
       quality_format: Standard
-      coverage_interval: genome
       # for targetted projects, set the region
       # variant_regions: /path/to/your.bed
 ''')
 
+os.unlink("/usr/local/share/bcbio-nextgen/gemini_data")
+os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data")
+
 os.chdir(arvados.current_task().tmpdir)
 
-rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/gatk-variant.yaml", "project1",
+rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1",
                          subst.do_substitution(p, "$(file $(R1))"),
                          subst.do_substitution(p, "$(file $(R2))")])
 
@@ -116,6 +125,7 @@ if rcode == 0:
     print("arvados-bcbio-nextgen: start writing output to keep")
 
     done = False
+    api = arvados.api('v1')
     while not done:
         try:
             out = arvados.CollectionWriter()