Backed that out, make a symlink to tool-data instead
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 2 Jul 2014 20:48:38 +0000 (16:48 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 2 Jul 2014 20:48:38 +0000 (16:48 -0400)
crunch_scripts/arvados-bcbio-nextgen.py

index ea2e7b41f048e59320f3d192b1fb7f15d2acb75f..8781087667d998c6e22d480b7d3b0a3f196602ce 100755 (executable)
@@ -11,13 +11,65 @@ if len(arvados.current_task()['parameters']) > 0:
 else:
     p = arvados.current_job()['script_parameters']
 
+t = arvados.current_task().tmpdir
+
 os.unlink("/usr/local/share/bcbio-nextgen/galaxy")
 os.mkdir("/usr/local/share/bcbio-nextgen/galaxy")
 shutil.copy("/usr/local/share/bcbio-nextgen/config/bcbio_system.yaml", "/usr/local/share/bcbio-nextgen/galaxy")
 
-os.chdir(arvados.current_task().tmpdir)
+with open("/usr/local/share/bcbio-nextgen/galaxy/tool_data_table_conf.xml", "w") as f:
+    f.write('''<tables>
+    <!-- Locations of indexes in the BWA mapper format -->
+    <table name="bwa_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bwa_index.loc" />
+    </table>
+    <!-- Locations of indexes in the Bowtie2 mapper format -->
+    <table name="bowtie2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table>
+    <!-- Locations of indexes in the Bowtie2 mapper format for TopHat2 to use -->
+    <table name="tophat2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table>
+    <!-- Location of SAMTools indexes and other files -->
+    <table name="sam_fa_indexes" comment_char="#">
+        <columns>index, value, path</columns>
+        <file path="tool-data/sam_fa_indices.loc" />
+    </table>
+    <!-- Location of Picard dict file and other files -->
+    <table name="picard_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/picard_index.loc" />
+    </table>
+    <!-- Location of Picard dict files valid for GATK -->
+    <table name="gatk_picard_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gatk_sorted_picard_index.loc" />
+    </table>
+</tables>
+''')
+
+os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data")
+
+with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f:
+    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))"))
+
+with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_indices.loc", "w") as f:
+    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_indices))"))
+
+with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f:
+    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))"))
+
+with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f:
+    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))"))
+
+with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f:
+    f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))"))
 
-with open("gatk-variant.yaml", "w") as f:
+with open("/tmp/crunch-job/gatk-variant.yaml", "w") as f:
     f.write('''
 # Template for whole genome Illumina variant calling with GATK pipeline
 ---
@@ -40,27 +92,14 @@ details:
       # variant_regions: /path/to/your.bed
 ''')
 
-rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "gatk-variant.yaml", "project1",
+os.chdir(arvados.current_task().tmpdir)
+
+rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/gatk-variant.yaml", "project1",
                          subst.do_substitution(p, "$(file $(R1))"),
                          subst.do_substitution(p, "$(file $(R2))")])
 
 os.chdir("project1/work")
 
-os.mkdir("tool-data")
-
-with open("tool-data/bowtie2_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))"))
-
-with open("tool-data/bwa_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_indices))"))
-
-with open("tool-data/gatk_sorted_picard_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))"))
-
-with open("tool-data/picard_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))"))
-
-with open("tool-data/sam_fa_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))"))
+os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data")
 
 rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml"])