var outputFormats = map[string]func() outputFormat{
"hgvs-numpy": func() outputFormat {
- return &formatHGVSNumpy{alleles: map[string][][]bool{}}
+ return &formatHGVSNumpy{alleles: map[string][][]int8{}}
},
"hgvs-onehot": func() outputFormat { return formatHGVSOneHot{} },
"hgvs": func() outputFormat { return formatHGVS{} },
tagcoverage := 0 // number of times the start tag was found in genomes -- max is len(cgs)*2
for cgidx, cg := range cgs {
for phase := 0; phase < 2; phase++ {
- if len(cg.Variants) <= int(libref.Tag)*2+phase {
- continue
+ var variant tileVariantID
+ if i := int(libref.Tag)*2 + phase; len(cg.Variants) > i {
+ variant = cg.Variants[i]
}
- variant := cg.Variants[int(libref.Tag)*2+phase]
- if variant == 0 {
- continue
+ if variant > 0 {
+ tagcoverage++
}
- tagcoverage++
- if variant == libref.Variant {
+ if variant == libref.Variant || variant == 0 {
continue
}
glibref := tileLibRef{Tag: libref.Tag, Variant: variant}
for i, pos := range flushpos {
varslice := variantAt[pos]
delete(variantAt, pos)
+ // Check for uninitialized (zero-value)
+ // elements in varslice
for i := range varslice {
- if varslice[i].Position == 0 {
- varslice[i].Position = pos
+ if varslice[i].Position != 0 {
+ // Not a zero-value element
+ continue
+ }
+ // Set the position so
+ // varslice[*].Position are all equal
+ varslice[i].Position = pos
+ // This could be either =ref or a
+ // missing/low-quality tile. Figure
+ // out which.
+ v := cgs[i/2].Variants[int(libref.Tag)*2+i%2]
+ if v < 1 || len(tilelib.TileVariantSequence(tileLibRef{Tag: libref.Tag, Variant: v})) == 0 {
+ // Missing/low-quality tile.
+ varslice[i].New = "-" // fasta "gap of indeterminate length"
}
}
flushvariants[i] = varslice
byref := map[string]map[string]int{}
for _, v := range varslice {
if v.Ref == "" && v.New == "" {
+ // =ref
+ continue
+ }
+ if v.New == "-" {
+ // no-call
continue
}
alts := byref[v.Ref]
out.Write([]byte{'\t'})
}
var1, var2 := varslice[i*2], varslice[i*2+1]
+ if var1.New == "-" || var2.New == "-" {
+ _, err := out.Write([]byte{'N'})
+ if err != nil {
+ return err
+ }
+ continue
+ }
if var1.Variant == var2.Variant {
if var1.Ref == var1.New {
_, err := out.Write([]byte{'.'})
sort.Slice(sorted, func(a, b int) bool { return hgvs.Less(sorted[a], sorted[b]) })
for _, v := range sorted {
+ if v.New == "-" {
+ continue
+ }
fmt.Fprintf(out, "%s.%s", seqname, v.String())
for i := 0; i < len(varslice); i += 2 {
if varslice[i].Variant == v || varslice[i+1].Variant == v {
type formatHGVSNumpy struct {
sync.Mutex
writelock sync.Mutex
- alleles map[string][][]bool // alleles[seqname][variantidx][genomeidx*2+phase]
+ alleles map[string][][]int8 // alleles[seqname][variantidx][genomeidx*2+phase]
}
func (*formatHGVSNumpy) MaxGoroutines() int { return 4 }
seqalleles := f.alleles[seqname]
f.Unlock()
- // append a row to seqvariants and seqalleles for each unique
- // non-ref variant in varslice.
+ // append a row to seqalleles for each unique non-ref variant
+ // in varslice.
var previous hgvs.Variant
for _, v := range sorted {
- if previous == v || v.Ref == v.New {
+ if previous == v || v.Ref == v.New || v.New == "-" {
continue
}
previous = v
- newrow := make([]bool, len(varslice))
+ newrow := make([]int8, len(varslice))
for i, allele := range varslice {
if allele.Variant == v {
- newrow[i] = true
+ newrow[i] = 1
+ } else if allele.Variant.New == "-" {
+ newrow[i] = -1
}
}
seqalleles = append(seqalleles, newrow)
func (f *formatHGVSNumpy) Finish(outdir string, _ io.Writer, seqname string) error {
// Write seqname's data to a .npy matrix with one row per
// genome and 2 columns per variant.
- seqalleles := f.alleles[seqname]
f.Lock()
+ seqalleles := f.alleles[seqname]
delete(f.alleles, seqname)
f.Unlock()
if len(seqalleles) == 0 {
for varidx, alleles := range seqalleles {
for g := 0; g < len(alleles)/2; g++ {
aa, ab := alleles[g*2], alleles[g*2+1]
- if aa && ab {
+ if aa < 0 || ab < 0 {
+ // no-call
+ out[g*cols+varidx*2] = -1
+ out[g*cols+varidx*2+1] = -1
+ } else if aa > 0 && ab > 0 {
// hom
out[g*cols+varidx*2] = 1
- } else if aa || ab {
+ } else if aa > 0 || ab > 0 {
// het
out[g*cols+varidx*2+1] = 1
}
package lightning
import (
- "bytes"
"io/ioutil"
"os"
"os/exec"
err := ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
c.Check(err, check.IsNil)
- var buffer bytes.Buffer
- exited := (&importer{}).RunCommand("import", []string{"-local=true", "-tag-library", "testdata/tags", "-output-tiles", "-save-incomplete-tiles", "testdata/pipeline1", "testdata/ref.fasta"}, &bytes.Buffer{}, &buffer, os.Stderr)
+ exited := (&importer{}).RunCommand("import", []string{
+ "-local=true",
+ "-tag-library", "testdata/tags",
+ "-output-tiles",
+ "-save-incomplete-tiles",
+ "-o", tmpdir + "/library1.gob",
+ "testdata/ref.fasta",
+ }, nil, os.Stderr, os.Stderr)
+ c.Assert(exited, check.Equals, 0)
+
+ exited = (&importer{}).RunCommand("import", []string{
+ "-local=true",
+ "-tag-library", "testdata/tags",
+ "-output-tiles",
+ // "-save-incomplete-tiles",
+ "-o", tmpdir + "/library2.gob",
+ "testdata/pipeline1",
+ }, nil, os.Stderr, os.Stderr)
c.Assert(exited, check.Equals, 0)
- ioutil.WriteFile(tmpdir+"/library.gob", buffer.Bytes(), 0644)
+
+ exited = (&merger{}).RunCommand("merge", []string{
+ "-local=true",
+ "-o", tmpdir + "/library.gob",
+ tmpdir + "/library1.gob",
+ tmpdir + "/library2.gob",
+ }, nil, os.Stderr, os.Stderr)
+ c.Assert(exited, check.Equals, 0)
+
+ input := tmpdir + "/library.gob"
exited = (&exporter{}).RunCommand("export", []string{
"-local=true",
- "-input-dir=" + tmpdir,
+ "-input-dir=" + input,
"-output-dir=" + tmpdir,
"-output-format=hgvs-onehot",
"-output-labels=" + tmpdir + "/labels.csv",
"-ref=testdata/ref.fasta",
- }, &buffer, os.Stderr, os.Stderr)
+ }, nil, os.Stderr, os.Stderr)
c.Check(exited, check.Equals, 0)
output, err := ioutil.ReadFile(tmpdir + "/out.chr1.tsv")
if !c.Check(err, check.IsNil) {
exited = (&exporter{}).RunCommand("export", []string{
"-local=true",
- "-input-dir=" + tmpdir,
+ "-input-dir=" + input,
"-output-dir=" + tmpdir,
"-output-format=pvcf",
"-ref=testdata/ref.fasta",
- }, &buffer, os.Stderr, os.Stderr)
+ }, os.Stderr, os.Stderr, os.Stderr)
c.Check(exited, check.Equals, 0)
output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
c.Check(err, check.IsNil)
exited = (&exporter{}).RunCommand("export", []string{
"-local=true",
- "-input-dir=" + tmpdir,
+ "-input-dir=" + input,
"-output-dir=" + tmpdir,
"-output-format=vcf",
"-ref=testdata/ref.fasta",
- }, &buffer, os.Stderr, os.Stderr)
+ }, nil, os.Stderr, os.Stderr)
c.Check(exited, check.Equals, 0)
output, err = ioutil.ReadFile(tmpdir + "/out.chr1.vcf")
c.Check(err, check.IsNil)
outdir := c.MkDir()
exited = (&exporter{}).RunCommand("export", []string{
"-local=true",
- "-input-dir=" + tmpdir,
+ "-input-dir=" + input,
"-output-dir=" + outdir,
"-output-format=hgvs-numpy",
"-ref=testdata/ref.fasta",
- }, &buffer, os.Stderr, os.Stderr)
+ }, nil, os.Stderr, os.Stderr)
c.Check(exited, check.Equals, 0)
f, err := os.Open(outdir + "/matrix.chr1.npy")
c.Check(variants, check.HasLen, 6*2*2) // 6 variants * 2 alleles * 2 genomes
c.Check(variants, check.DeepEquals, []int8{
1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, // input1.1.fasta
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // input2.1.fasta
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, // input2.1.fasta
})
f, err = os.Open(outdir + "/matrix.chr2.npy")