type outputFormat struct {
Filename string
+ Head func(out io.Writer, cgs []CompactGenome)
Print func(out io.Writer, seqname string, varslice []hgvs.Variant)
PadLeft bool
}
"pvcf": outputFormatPVCF,
"vcf": outputFormatVCF,
}
- outputFormatHGVS = outputFormat{Filename: "out.csv", Print: printHGVS}
- outputFormatHGVSOneHot = outputFormat{Filename: "out.csv", Print: printHGVSOneHot}
- outputFormatPVCF = outputFormat{Filename: "out.vcf", Print: printPVCF, PadLeft: true}
- outputFormatVCF = outputFormat{Filename: "out.vcf", Print: printVCF, PadLeft: true}
+ outputFormatHGVS = outputFormat{Filename: "out.csv", Head: headNone, Print: printHGVS}
+ outputFormatHGVSOneHot = outputFormat{Filename: "out.csv", Head: headNone, Print: printHGVSOneHot}
+ outputFormatPVCF = outputFormat{Filename: "out.vcf", Head: headPVCF, Print: printPVCF, PadLeft: true}
+ outputFormatVCF = outputFormat{Filename: "out.vcf", Head: headVCF, Print: printVCF, PadLeft: true}
+ headNone = func(io.Writer, []CompactGenome) {}
)
type exporter struct {
}
if cmd.outputPerChrom {
for i, seqname := range seqnames {
- f, err := os.OpenFile(filepath.Join(outdir, strings.Replace(cmd.outputFormat.Filename, ".", "."+seqname+".", 1)), os.O_CREATE|os.O_WRONLY, 0666)
+ f, err := os.OpenFile(filepath.Join(outdir, strings.Replace(cmd.outputFormat.Filename, ".", "."+seqname+".", 1)), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
if err != nil {
return err
}
defer f.Close()
log.Infof("writing %q", f.Name())
+ cmd.outputFormat.Head(f, cgs)
outw[i] = f
}
} else {
fnm := filepath.Join(outdir, cmd.outputFormat.Filename)
log.Infof("writing %q", fnm)
- out, err := os.OpenFile(fnm, os.O_CREATE|os.O_WRONLY, 0666)
+ out, err := os.OpenFile(fnm, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
if err != nil {
return err
}
defer out.Close()
+ cmd.outputFormat.Head(out, cgs)
merge(out, outw, "output")
}
if bedout != nil {
return byref
}
+func headVCF(out io.Writer, cgs []CompactGenome) {
+ fmt.Fprint(out, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
+}
+
func printVCF(out io.Writer, seqname string, varslice []hgvs.Variant) {
for ref, alts := range bucketVarsliceByRef(varslice) {
altslice := make([]string, 0, len(alts))
}
info += strconv.Itoa(alts[a])
}
- fmt.Fprintf(out, "%s\t%d\t%s\t%s\t.\t.\t%s\tGT\t0/1\n", seqname, varslice[0].Position, ref, strings.Join(altslice, ","), info)
+ fmt.Fprintf(out, "%s\t%d\t.\t%s\t%s\t.\t.\t%s\n", seqname, varslice[0].Position, ref, strings.Join(altslice, ","), info)
+ }
+}
+
+func headPVCF(out io.Writer, cgs []CompactGenome) {
+ fmt.Fprintln(out, `##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">`)
+ fmt.Fprintf(out, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT")
+ for _, cg := range cgs {
+ fmt.Fprintf(out, "\t%s", cg.Name)
}
+ fmt.Fprintf(out, "\n")
}
func printPVCF(out io.Writer, seqname string, varslice []hgvs.Variant) {
for i, a := range altslice {
alts[a] = i + 1
}
- fmt.Fprintf(out, "%s\t%d\t%s\t%s\t.\t.\tGT", seqname, varslice[0].Position, ref, strings.Join(altslice, ","))
+ fmt.Fprintf(out, "%s\t%d\t.\t%s\t%s\t.\t.\t.\tGT", seqname, varslice[0].Position, ref, strings.Join(altslice, ","))
for i := 0; i < len(varslice); i += 2 {
v1, v2 := varslice[i], varslice[i+1]
a1, a2 := alts[v1.New], alts[v2.New]
output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
c.Check(err, check.IsNil)
c.Log(string(output))
- c.Check(sortLines(string(output)), check.Equals, sortLines(`chr1 1 NNN GGC . . GT 1/1 0/0
-chr1 41 TT AA . . GT 1/0 0/0
-chr1 161 A T . . GT 0/1 0/0
-chr1 178 A T . . GT 0/1 0/0
-chr1 221 TCCA T . . GT 1/1 0/0
-chr1 302 TTTT AAAA . . GT 0/1 0/0
+ c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
+chr1 1 . NNN GGC . . . GT 1/1 0/0
+chr1 41 . TT AA . . . GT 1/0 0/0
+chr1 161 . A T . . . GT 0/1 0/0
+chr1 178 . A T . . . GT 0/1 0/0
+chr1 221 . TCCA T . . . GT 1/1 0/0
+chr1 302 . TTTT AAAA . . . GT 0/1 0/0
`))
output, err = ioutil.ReadFile(tmpdir + "/out.chr2.vcf")
c.Check(err, check.IsNil)
c.Log(string(output))
- c.Check(sortLines(string(output)), check.Equals, sortLines(`chr2 1 TTT AAA . . GT 0/0 0/1
-chr2 125 CTT AAA . . GT 0/0 1/1
-chr2 240 ATTTTTCTTGCTCTC A . . GT 1/0 0/0
-chr2 258 CCTTGTATTTTT AA . . GT 1/0 0/0
-chr2 315 C A . . GT 1/0 0/0
-chr2 469 GTGG G . . GT 1/0 0/0
-chr2 471 GG AA . . GT 0/1 0/0
+ c.Check(sortLines(string(output)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
+chr2 1 . TTT AAA . . . GT 0/0 0/1
+chr2 125 . CTT AAA . . . GT 0/0 1/1
+chr2 240 . ATTTTTCTTGCTCTC A . . . GT 1/0 0/0
+chr2 258 . CCTTGTATTTTT AA . . . GT 1/0 0/0
+chr2 315 . C A . . . GT 1/0 0/0
+chr2 469 . GTGG G . . . GT 1/0 0/0
+chr2 471 . GG AA . . . GT 0/1 0/0
`))
exited = (&exporter{}).RunCommand("export", []string{
output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
c.Check(err, check.IsNil)
c.Log(string(output))
- c.Check(sortLines(string(output)), check.Equals, sortLines(`chr1 1 NNN GGC . . AC=2 GT 0/1
-chr1 41 TT AA . . AC=1 GT 0/1
-chr1 161 A T . . AC=1 GT 0/1
-chr1 178 A T . . AC=1 GT 0/1
-chr1 221 TCCA T . . AC=2 GT 0/1
-chr1 302 TTTT AAAA . . AC=1 GT 0/1
+ c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
+chr1 1 . NNN GGC . . AC=2
+chr1 41 . TT AA . . AC=1
+chr1 161 . A T . . AC=1
+chr1 178 . A T . . AC=1
+chr1 221 . TCCA T . . AC=2
+chr1 302 . TTTT AAAA . . AC=1
`))
output, err = ioutil.ReadFile(tmpdir + "/out.chr2.vcf")
c.Check(err, check.IsNil)
c.Log(string(output))
- c.Check(sortLines(string(output)), check.Equals, sortLines(`chr2 1 TTT AAA . . AC=1 GT 0/1
-chr2 125 CTT AAA . . AC=2 GT 0/1
-chr2 240 ATTTTTCTTGCTCTC A . . AC=1 GT 0/1
-chr2 258 CCTTGTATTTTT AA . . AC=1 GT 0/1
-chr2 315 C A . . AC=1 GT 0/1
-chr2 469 GTGG G . . AC=1 GT 0/1
-chr2 471 GG AA . . AC=1 GT 0/1
+ c.Check(sortLines(string(output)), check.Equals, sortLines(`#CHROM POS ID REF ALT QUAL FILTER INFO
+chr2 1 . TTT AAA . . AC=1
+chr2 125 . CTT AAA . . AC=2
+chr2 240 . ATTTTTCTTGCTCTC A . . AC=1
+chr2 258 . CCTTGTATTTTT AA . . AC=1
+chr2 315 . C A . . AC=1
+chr2 469 . GTGG G . . AC=1
+chr2 471 . GG AA . . AC=1
`))
}
c.Check(code, check.Equals, 0)
vcfout, err := ioutil.ReadFile(tmpdir + "/out.vcf")
c.Check(err, check.IsNil)
- c.Check(sortLines(string(vcfout)), check.Equals, sortLines(`chr1 1 NNN GGC . . GT 1/1 0/0
-chr1 41 TT AA . . GT 1/0 0/0
-chr1 161 A T . . GT 0/1 0/0
-chr1 178 A T . . GT 0/1 0/0
-chr1 221 TCCA T . . GT 1/1 0/0
-chr1 302 TTTT AAAA . . GT 0/1 0/0
-chr2 1 TTT AAA . . GT 0/0 0/1
-chr2 125 CTT AAA . . GT 0/0 1/1
-chr2 240 ATTTTTCTTGCTCTC A . . GT 1/0 0/0
-chr2 258 CCTTGTATTTTT AA . . GT 1/0 0/0
-chr2 315 C A . . GT 1/0 0/0
-chr2 469 GTGG G . . GT 1/0 0/0
-chr2 471 GG AA . . GT 0/1 0/0
+ c.Check(sortLines(string(vcfout)), check.Equals, sortLines(`##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT testdata/pipeline1/input1.1.fasta testdata/pipeline1/input2.1.fasta
+chr1 1 . NNN GGC . . . GT 1/1 0/0
+chr1 41 . TT AA . . . GT 1/0 0/0
+chr1 161 . A T . . . GT 0/1 0/0
+chr1 178 . A T . . . GT 0/1 0/0
+chr1 221 . TCCA T . . . GT 1/1 0/0
+chr1 302 . TTTT AAAA . . . GT 0/1 0/0
+chr2 1 . TTT AAA . . . GT 0/0 0/1
+chr2 125 . CTT AAA . . . GT 0/0 1/1
+chr2 240 . ATTTTTCTTGCTCTC A . . . GT 1/0 0/0
+chr2 258 . CCTTGTATTTTT AA . . . GT 1/0 0/0
+chr2 315 . C A . . . GT 1/0 0/0
+chr2 469 . GTGG G . . . GT 1/0 0/0
+chr2 471 . GG AA . . . GT 0/1 0/0
`))
bedout, err := ioutil.ReadFile(tmpdir + "/export.bed")
c.Check(err, check.IsNil)