chi2Cases: []bool{false, true, true, false},
chi2PValue: .5,
includeVariant1: true,
+ minCoverage: 3,
}
cgs := map[string]CompactGenome{
"sample1": CompactGenome{
maxv := tileVariantID(3)
remap := []tileVariantID{0, 1, 0, 0, 0, 2, 0, 0, 0, 3}
chunkstarttag := tagID(10)
+ fakevariant := TileVariant{Sequence: []byte("ACGT")}
+ seq := map[tagID][]TileVariant{}
for tag := tagID(10); tag < 12; tag++ {
+ seq[tag-chunkstarttag] = []TileVariant{TileVariant{}, fakevariant, TileVariant{}, TileVariant{}, TileVariant{}, fakevariant}
c.Logf("=== tag %d", tag)
- chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag)
+ chunk, xref := cmd.tv2homhet(cgs, maxv, remap, tag, chunkstarttag, seq)
c.Logf("chunk len=%d", len(chunk))
for _, x := range chunk {
c.Logf("%+v", x)
}
}
if *onehotChunked || *onehotSingle {
- onehot, xrefs := cmd.tv2homhet(cgs, maxv, remap, tag, tagstart)
+ onehot, xrefs := cmd.tv2homhet(cgs, maxv, remap, tag, tagstart, seq)
if tag == cmd.debugTag {
log.WithFields(logrus.Fields{
"onehot": onehot,
// variants of a single tile/tag#.
//
// Return nil if no tile variant passes Χ² filter.
-func (cmd *sliceNumpy) tv2homhet(cgs map[string]CompactGenome, maxv tileVariantID, remap []tileVariantID, tag, chunkstarttag tagID) ([][]int8, []onehotXref) {
+func (cmd *sliceNumpy) tv2homhet(cgs map[string]CompactGenome, maxv tileVariantID, remap []tileVariantID, tag, chunkstarttag tagID, seq map[tagID][]TileVariant) ([][]int8, []onehotXref) {
if tag == cmd.debugTag {
tv := make([]tileVariantID, len(cmd.cgnames)*2)
for i, name := range cmd.cgnames {
tagoffset := tag - chunkstarttag
coverage := 0
for _, cg := range cgs {
- if cg.Variants[tagoffset*2] > 0 && cg.Variants[tagoffset*2+1] > 0 {
+ alleles := 0
+ for _, v := range cg.Variants[tagoffset*2 : tagoffset*2+2] {
+ if v > 0 && int(v) < len(seq[tagoffset]) && len(seq[tagoffset][v].Sequence) > 0 {
+ alleles++
+ }
+ }
+ if alleles == 2 {
coverage++
}
}