projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
10081: Upload dependencies. Fix keep paths. Ensure correct quoting.
[arvados.git]
/
crunch_scripts
/
arvados-bcbio-nextgen.py
diff --git
a/crunch_scripts/arvados-bcbio-nextgen.py
b/crunch_scripts/arvados-bcbio-nextgen.py
index 37ac7d370c03b2e70e98cfa58a8292fcd5fb093f..9351b058530285b760f017ca1f95e5c20b442a08 100755
(executable)
--- a/
crunch_scripts/arvados-bcbio-nextgen.py
+++ b/
crunch_scripts/arvados-bcbio-nextgen.py
@@
-2,9
+2,11
@@
import arvados
import subprocess
import arvados
import subprocess
-import subst
+import
crunchutil.subst as
subst
import shutil
import os
import shutil
import os
+import sys
+import time
if len(arvados.current_task()['parameters']) > 0:
p = arvados.current_task()['parameters']
if len(arvados.current_task()['parameters']) > 0:
p = arvados.current_task()['parameters']
@@
-69,9
+71,11
@@
with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w
with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f:
f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n"))
with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f:
f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n"))
-with open("/tmp/crunch-job/
gatk
-variant.yaml", "w") as f:
+with open("/tmp/crunch-job/
freebayes
-variant.yaml", "w") as f:
f.write('''
f.write('''
-# Template for whole genome Illumina variant calling with GATK pipeline
+# Template for whole genome Illumina variant calling with FreeBayes
+# This is a GATK-free pipeline without post-alignment BAM pre-processing
+# (recalibration and realignment)
---
details:
- analysis: variant2
---
details:
- analysis: variant2
@@
-81,20
+85,22
@@
details:
# batch: your-arbitrary-batch-name
algorithm:
aligner: bwa
# batch: your-arbitrary-batch-name
algorithm:
aligner: bwa
- mark_duplicates:
picard
- recalibrate:
gatk
- realign:
gatk
- variantcaller:
gatk-haplotype
+ mark_duplicates:
true
+ recalibrate:
false
+ realign:
false
+ variantcaller:
freebayes
platform: illumina
quality_format: Standard
platform: illumina
quality_format: Standard
- coverage_interval: genome
# for targetted projects, set the region
# variant_regions: /path/to/your.bed
''')
# for targetted projects, set the region
# variant_regions: /path/to/your.bed
''')
+os.unlink("/usr/local/share/bcbio-nextgen/gemini_data")
+os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data")
+
os.chdir(arvados.current_task().tmpdir)
os.chdir(arvados.current_task().tmpdir)
-rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/
gatk
-variant.yaml", "project1",
+rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/
freebayes
-variant.yaml", "project1",
subst.do_substitution(p, "$(file $(R1))"),
subst.do_substitution(p, "$(file $(R2))")])
subst.do_substitution(p, "$(file $(R1))"),
subst.do_substitution(p, "$(file $(R2))")])
@@
-116,6
+122,7
@@
if rcode == 0:
print("arvados-bcbio-nextgen: start writing output to keep")
done = False
print("arvados-bcbio-nextgen: start writing output to keep")
done = False
+ api = arvados.api('v1')
while not done:
try:
out = arvados.CollectionWriter()
while not done:
try:
out = arvados.CollectionWriter()