Generate annotations for spanning tiles.
authorTom Clegg <tom@curii.com>
Fri, 15 Jul 2022 17:51:56 +0000 (13:51 -0400)
committerTom Clegg <tom@curii.com>
Fri, 15 Jul 2022 19:22:56 +0000 (15:22 -0400)
refs #19271

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

slice_test.go
slicenumpy.go
testdata/spanningtile/input1.1.fasta [new file with mode: 0644]
testdata/spanningtile/input1.2.fasta [new file with mode: 0644]

index c7a592b645aa86aa6444a9215b12f1c9c21ad191..b61ba18eebfd97000a2c8be384cd0aff9ce760d4 100644 (file)
@@ -261,9 +261,13 @@ pipeline1dup/input2        0
 
                annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
                c.Assert(err, check.IsNil)
-               c.Check(string(annotations), check.Equals, `0,chr2:g.470_472del
-1,chr2:g.471G>A
-2,chr2:g.472G>A
+               c.Check(string(annotations), check.Equals, `0,chr2:g.1_3delinsAAA
+1,chr2:g.125_127delinsAAA
+2,chr2:g.241_245delinsAAAAA
+3,chr2:g.291C>A
+4,chr2:g.470_472del
+5,chr2:g.471G>A
+6,chr2:g.472G>A
 `)
        }
 
@@ -375,6 +379,319 @@ pipeline1dup/input2       0
        }
 }
 
+func (s *sliceSuite) TestSpanningTile(c *check.C) {
+       tmpdir := c.MkDir()
+       err := os.Mkdir(tmpdir+"/lib1", 0777)
+       c.Assert(err, check.IsNil)
+       err = os.Mkdir(tmpdir+"/lib2", 0777)
+       c.Assert(err, check.IsNil)
+       cwd, err := os.Getwd()
+       c.Assert(err, check.IsNil)
+       err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1")
+       c.Assert(err, check.IsNil)
+       err = os.Symlink(cwd+"/testdata/pipeline1", tmpdir+"/pipeline1dup")
+       c.Assert(err, check.IsNil)
+
+       err = ioutil.WriteFile(tmpdir+"/chr1-12-100.bed", []byte("chr1\t12\t100\ttest.1\n"), 0644)
+       c.Check(err, check.IsNil)
+
+       c.Log("=== import testdata/ref ===")
+       exited := (&importer{}).RunCommand("import", []string{
+               "-local=true",
+               "-tag-library", "testdata/tags",
+               "-output-tiles",
+               "-save-incomplete-tiles",
+               "-o", tmpdir + "/lib1/library1.gob",
+               "testdata/ref.fasta",
+       }, nil, os.Stderr, os.Stderr)
+       c.Assert(exited, check.Equals, 0)
+
+       c.Log("=== import testdata/spanningtile ===")
+       exited = (&importer{}).RunCommand("import", []string{
+               "-local=true",
+               "-tag-library", "testdata/tags",
+               "-output-tiles",
+               "-o", tmpdir + "/lib2/library2.gob",
+               cwd + "/testdata/spanningtile",
+       }, nil, os.Stderr, os.Stderr)
+       c.Assert(exited, check.Equals, 0)
+
+       slicedir := c.MkDir()
+
+       c.Log("=== slice ===")
+       exited = (&slicecmd{}).RunCommand("slice", []string{
+               "-local=true",
+               "-output-dir=" + slicedir,
+               "-tags-per-file=2",
+               tmpdir + "/lib1",
+               tmpdir + "/lib2",
+       }, nil, os.Stderr, os.Stderr)
+       c.Check(exited, check.Equals, 0)
+       out, _ := exec.Command("find", slicedir, "-ls").CombinedOutput()
+       c.Logf("%s", out)
+
+       c.Log("=== dump ===")
+       {
+               dumpdir := c.MkDir()
+               exited = (&dump{}).RunCommand("dump", []string{
+                       "-local=true",
+                       "-tags=5,6",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + dumpdir,
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", dumpdir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+               dumped, err := ioutil.ReadFile(dumpdir + "/variants.csv")
+               c.Assert(err, check.IsNil)
+               c.Logf("%s", dumped)
+               // spanning tile for tag 5 with A>G snp in tag 6
+               c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n5,1,0,chr2,225,.*AAAACTGATCCGAAAAAAATACAA.*`)
+               c.Check("\n"+string(dumped), check.Matches, `(?ms).*\n6,1,1,chr2,349,AAAACTGATCCAAAAAAAATACAA.*`)
+       }
+
+       c.Log("=== slice-numpy ===")
+       {
+               npydir := c.MkDir()
+               exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
+                       "-local=true",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + npydir,
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+
+               f, err := os.Open(npydir + "/matrix.0000.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err := gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{1, 4})
+               variants, err := npy.GetInt16()
+               c.Check(variants, check.DeepEquals, []int16{-1, -1, 1, 1})
+
+               annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
+               c.Assert(err, check.IsNil)
+               c.Logf("%s", annotations)
+
+               f, err = os.Open(npydir + "/matrix.0002.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err = gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{1, 4})
+               variants, err = npy.GetInt16()
+               c.Check(variants, check.DeepEquals, []int16{1, 1, 1, 2})
+
+               annotations, err = ioutil.ReadFile(npydir + "/matrix.0002.annotations.csv")
+               c.Assert(err, check.IsNil)
+               c.Logf("%s", annotations)
+               for _, s := range []string{
+                       "chr2:g\\.360A>G",
+               } {
+                       c.Check(string(annotations), check.Matches, "(?ms).*"+s+".*")
+               }
+       }
+
+       c.Log("=== slice-numpy + regions ===")
+       {
+               npydir := c.MkDir()
+               exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
+                       "-local=true",
+                       "-regions=" + tmpdir + "/chr1-12-100.bed",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + npydir,
+                       "-chunked-hgvs-matrix=true",
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+
+               f, err := os.Open(npydir + "/matrix.0000.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err := gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
+               variants, err := npy.GetInt16()
+               c.Check(variants, check.DeepEquals, []int16{-1, -1})
+
+               annotations, err := ioutil.ReadFile(npydir + "/matrix.0000.annotations.csv")
+               c.Assert(err, check.IsNil)
+               c.Check(string(annotations), check.Equals, "0,0,1,=,chr1,0,,,\n")
+
+               for _, fnm := range []string{
+                       npydir + "/matrix.0001.annotations.csv",
+                       npydir + "/matrix.0002.annotations.csv",
+               } {
+                       annotations, err := ioutil.ReadFile(fnm)
+                       c.Assert(err, check.IsNil)
+                       c.Check(string(annotations), check.Equals, "", check.Commentf(fnm))
+               }
+       }
+
+       err = ioutil.WriteFile(tmpdir+"/chr1and2-100-200.bed", []byte("chr1\t100\t200\ttest.1\nchr2\t100\t200\ttest.2\n"), 0644)
+       c.Check(err, check.IsNil)
+
+       c.Log("=== slice-numpy + regions + merge ===")
+       {
+               npydir := c.MkDir()
+               exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
+                       "-local=true",
+                       "-regions=" + tmpdir + "/chr1and2-100-200.bed",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + npydir,
+                       "-merge-output=true",
+                       "-single-hgvs-matrix=true",
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+
+               f, err := os.Open(npydir + "/matrix.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err := gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{1, 4})
+               variants, err := npy.GetInt16()
+               if c.Check(err, check.IsNil) {
+                       c.Check(variants, check.DeepEquals, []int16{
+                               -1, -1, 1, 1,
+                       })
+               }
+
+               annotations, err := ioutil.ReadFile(npydir + "/matrix.annotations.csv")
+               c.Assert(err, check.IsNil)
+               c.Check(string(annotations), check.Equals, "")
+       }
+
+       c.Log("=== slice-numpy + chunked hgvs matrix ===")
+       {
+               err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
+spanningtile/input1    1
+`), 0600)
+               c.Assert(err, check.IsNil)
+               npydir := c.MkDir()
+               exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
+                       "-local=true",
+                       "-chunked-hgvs-matrix=true",
+                       "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
+                       "-chi2-case-control-column=CC",
+                       "-chi2-p-value=1",
+                       "-min-coverage=0.75",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + npydir,
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+
+               annotations, err := ioutil.ReadFile(npydir + "/hgvs.chr2.annotations.csv")
+               c.Assert(err, check.IsNil)
+               c.Check(string(annotations), check.Equals, `0,chr2:g.360A>G
+`)
+       }
+
+       c.Log("=== slice-numpy + onehotChunked ===")
+       {
+               err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
+spanningtile/input1    1
+`), 0600)
+               c.Assert(err, check.IsNil)
+               npydir := c.MkDir()
+               exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
+                       "-local=true",
+                       "-chunked-onehot=true",
+                       "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
+                       "-chi2-case-control-column=CC",
+                       "-chi2-p-value=1",
+                       "-min-coverage=0.75",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + npydir,
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+
+               f, err := os.Open(npydir + "/onehot.0002.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err := gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{1, 2})
+               onehot, err := npy.GetInt8()
+               if c.Check(err, check.IsNil) {
+                       for r := 0; r < npy.Shape[0]; r++ {
+                               c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
+                       }
+                       c.Check(onehot, check.DeepEquals, []int8{
+                               0, 1, // input1
+                       })
+               }
+       }
+
+       c.Log("=== slice-numpy + onehotSingle ===")
+       {
+               err = ioutil.WriteFile(tmpdir+"/casecontrol.tsv", []byte(`SampleID      CC
+spanningtile/input1    1
+`), 0600)
+               c.Assert(err, check.IsNil)
+               npydir := c.MkDir()
+               exited := (&sliceNumpy{}).RunCommand("slice-numpy", []string{
+                       "-local=true",
+                       "-single-onehot=true",
+                       "-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
+                       "-chi2-case-control-column=CC",
+                       "-chi2-p-value=1",
+                       "-min-coverage=0.75",
+                       "-input-dir=" + slicedir,
+                       "-output-dir=" + npydir,
+                       "-debug-tag=1",
+               }, nil, os.Stderr, os.Stderr)
+               c.Check(exited, check.Equals, 0)
+               out, _ := exec.Command("find", npydir, "-ls").CombinedOutput()
+               c.Logf("%s", out)
+
+               f, err := os.Open(npydir + "/onehot.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err := gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{2, 1})
+               onehot, err := npy.GetUint32()
+               if c.Check(err, check.IsNil) {
+                       for r := 0; r < npy.Shape[0]; r++ {
+                               c.Logf("%v", onehot[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
+                       }
+                       c.Check(onehot, check.DeepEquals, []uint32{
+                               0, 1,
+                       })
+               }
+
+               f, err = os.Open(npydir + "/onehot-columns.npy")
+               c.Assert(err, check.IsNil)
+               defer f.Close()
+               npy, err = gonpy.NewReader(f)
+               c.Assert(err, check.IsNil)
+               c.Check(npy.Shape, check.DeepEquals, []int{5, 2})
+               onehotcols, err := npy.GetInt32()
+               if c.Check(err, check.IsNil) {
+                       for r := 0; r < npy.Shape[0]; r++ {
+                               c.Logf("%v", onehotcols[r*npy.Shape[1]:(r+1)*npy.Shape[1]])
+                       }
+                       c.Check(onehotcols, check.DeepEquals, []int32{
+                               5, 5,
+                               2, 2,
+                               1, 0,
+                               1000000, 1000000,
+                               0, 0,
+                       })
+               }
+       }
+}
+
 func (s *sliceSuite) Test_tv2homhet(c *check.C) {
        cmd := &sliceNumpy{
                cgnames:         []string{"sample1", "sample2", "sample3", "sample4"},
index 25f5b0bf18f181767cd8a0c0f52e6260d755c51d..1322b91544140df640bc1c53dcad133155df358f 100644 (file)
@@ -34,6 +34,8 @@ import (
        "golang.org/x/crypto/blake2b"
 )
 
+const annotationMaxTileSpan = 100
+
 type sliceNumpy struct {
        filter                filter
        threads               int
@@ -216,6 +218,10 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
        if err != nil {
                return 1
        }
+       if len(cmd.cgnames) == 0 {
+               err = fmt.Errorf("fatal: 0 cases, 0 controls, nothing to do")
+               return 1
+       }
        cmd.minCoverage = int(math.Ceil(cmd.filter.MinCoverage * float64(len(cmd.cgnames))))
 
        {
@@ -251,11 +257,14 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                seqname  string // chr1
                pos      int    // distance from start of chromosome to starttag
                tiledata []byte // acgtggcaa...
+               excluded bool   // true if excluded by regions file
+               nexttag  tagID  // tagID of following tile (-1 for last tag of chromosome)
        }
        isdup := map[tagID]bool{}
        reftile := map[tagID]*reftileinfo{}
        for seqname, cseq := range refseq {
                pos := 0
+               lastreftag := tagID(-1)
                for _, libref := range cseq {
                        if cmd.filter.MaxTag >= 0 && libref.Tag > tagID(cmd.filter.MaxTag) {
                                continue
@@ -292,7 +301,12 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                        variant:  libref.Variant,
                                        tiledata: tiledata,
                                        pos:      pos,
+                                       nexttag:  -1,
                                }
+                               if lastreftag >= 0 {
+                                       reftile[lastreftag].nexttag = libref.Tag
+                               }
+                               lastreftag = libref.Tag
                        }
                        pos += len(tiledata) - taglen
                }
@@ -308,9 +322,9 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                }
                log.Printf("before applying mask, len(reftile) == %d", len(reftile))
                log.Printf("deleting reftile entries for regions outside %d intervals", mask.Len())
-               for tag, rt := range reftile {
+               for _, rt := range reftile {
                        if !mask.Check(strings.TrimPrefix(rt.seqname, "chr"), rt.pos, rt.pos+len(rt.tiledata)) {
-                               delete(reftile, tag)
+                               rt.excluded = true
                        }
                }
                log.Printf("after applying mask, len(reftile) == %d", len(reftile))
@@ -383,10 +397,22 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                        if tv.Ref {
                                                continue
                                        }
+                                       // Skip tile with no
+                                       // corresponding ref tile, if
+                                       // mask is in play (we can't
+                                       // determine coordinates for
+                                       // these)
                                        if mask != nil && reftile[tv.Tag] == nil {
-                                               // Don't waste
-                                               // time/memory on
-                                               // masked-out tiles.
+                                               continue
+                                       }
+                                       // Skip tile whose
+                                       // corresponding ref tile is
+                                       // outside target regions --
+                                       // unless it's a potential
+                                       // spanning tile.
+                                       if mask != nil && reftile[tv.Tag].excluded &&
+                                               (int(tv.Tag+1) >= len(tagset) ||
+                                                       (bytes.HasSuffix(tv.Sequence, tagset[tv.Tag+1]) && reftile[tv.Tag+1] != nil && !reftile[tv.Tag+1].excluded)) {
                                                continue
                                        }
                                        if tv.Tag == cmd.debugTag {
@@ -523,7 +549,20 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                        for tag := tagstart; tag < tagend; tag++ {
                                rt := reftile[tag]
                                if rt == nil && mask != nil {
-                                       // Excluded by specified regions
+                                       // With no ref tile, we don't
+                                       // have coordinates to say
+                                       // this is in the desired
+                                       // regions -- so it's not.
+                                       // TODO: handle ref spanning
+                                       // tile case.
+                                       continue
+                               }
+                               if rt != nil && rt.excluded {
+                                       // TODO: don't skip yet --
+                                       // first check for spanning
+                                       // tile variants that
+                                       // intersect non-excluded ref
+                                       // tiles.
                                        continue
                                }
                                if cmd.filter.MaxTag >= 0 && tag > tagID(cmd.filter.MaxTag) {
@@ -550,6 +589,10 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                if rt == nil {
                                        // Reference does not use any
                                        // variant of this tile
+                                       //
+                                       // TODO: diff against the
+                                       // relevant portion of the
+                                       // ref's spanning tile
                                        outcol++
                                        continue
                                }
@@ -566,11 +609,29 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                        } else {
                                                done[v] = true
                                        }
-                                       if len(tv.Sequence) < taglen || !bytes.HasSuffix(rt.tiledata, tv.Sequence[len(tv.Sequence)-taglen:]) {
+                                       if len(tv.Sequence) < taglen {
+                                               continue
+                                       }
+                                       // if reftilestr doesn't end
+                                       // in the same tag as tv,
+                                       // extend reftilestr with
+                                       // following ref tiles until
+                                       // it does (up to an arbitrary
+                                       // sanity-check limit)
+                                       reftilestr := reftilestr
+                                       endtagstr := strings.ToUpper(string(tv.Sequence[len(tv.Sequence)-taglen:]))
+                                       for i, rt := 0, rt; i < annotationMaxTileSpan && !strings.HasSuffix(reftilestr, endtagstr) && rt.nexttag >= 0; i++ {
+                                               rt = reftile[rt.nexttag]
+                                               reftilestr += strings.ToUpper(string(rt.tiledata[taglen:]))
+                                       }
+                                       if mask != nil && !mask.Check(strings.TrimPrefix(rt.seqname, "chr"), rt.pos, rt.pos+len(reftilestr)) {
+                                               continue
+                                       }
+                                       if !strings.HasSuffix(reftilestr, endtagstr) {
                                                fmt.Fprintf(annow, "%d,%d,%d,,%s,%d,,,\n", tag, outcol, v, rt.seqname, rt.pos)
                                                continue
                                        }
-                                       if lendiff := len(rt.tiledata) - len(tv.Sequence); lendiff < -1000 || lendiff > 1000 {
+                                       if lendiff := len(reftilestr) - len(tv.Sequence); lendiff < -1000 || lendiff > 1000 {
                                                fmt.Fprintf(annow, "%d,%d,%d,,%s,%d,,,\n", tag, outcol, v, rt.seqname, rt.pos)
                                                continue
                                        }
@@ -689,7 +750,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                                                if cmd.filter.MaxTag >= 0 && tag > tagID(cmd.filter.MaxTag) {
                                                        break
                                                }
-                                               if mask != nil && reftile[tag] == nil {
+                                               if rt := reftile[tag]; rt == nil || rt.excluded {
                                                        continue
                                                }
                                                if v == 0 {
diff --git a/testdata/spanningtile/input1.1.fasta b/testdata/spanningtile/input1.1.fasta
new file mode 100644 (file)
index 0000000..718ddd3
--- /dev/null
@@ -0,0 +1,20 @@
+>chr1
+nnngtctacctcgagaagccccgacctctgaataagatctttgaacatctcaagggattgtgtatcttgttgggtgtacgcgcgccagcccgcagcatta
+ggagaactgtgctccgccttcaga
+ccccttgggtaaaatgccgcgcaatatgttgattacacttgctgcccatctgaaaggtcgccttatcaatcctatgctgaatgccctctaaggagttcca
+acacatgctagcgcgtcggggtgg
+tcgccgcatgggacatgttggggtcccgtagctccggtcgatgtaggcacgcgttttgccgaggagatagatcatcagctgacctatagattcgtctgtc
+gactctagcagagtggccagccac
+aaggtttatattcagtcgaaatggacgggtcccgaacttgcacggacctaacgggactcgcctttcgtaataaccgaacctctaggccgccgcgagatca
+cctcccgagccgagccacccgtca
+tgatagccatcccgcacctagcgcacggaacttcgaccgatcccatattaacgtgtctcttatcctcccgctactgcgtgccttgccactcgtttactat
+>chr2
+tttttatagcagtagcggttgcgataatgcgcactaaggtggccataacttagccacacagactgcgacctcggtgtcaatctttaggcgatgactagtg
+gttattaataataacttatcatca
+cttccggggaaactagcgtaaaaaccgccgtcgcagtataccaccttatacctgtgccactcaatacaggagttgctcagcccaagacaccaacgactaa
+gctctcaaaccttgtatttttctt
+atgtcctcgcttggattatggggactcgcagtaaatgacaaccgtgcccgctggccctggccggaccgcgcgtccgtaagtagcgatcgagtcctgcagt
+aaaactgatccGaaaaaaatacaa
+acaccacgtttataccctgttgagtcgccacgtaacgtattaacgtatgaacggcccggttttcttatctcgcaccgctagtttgccctggcggtcgtgg
+cctatgagtcaatcctattttcaa
+gatgagtatgtataaggcggcctgtcacgtgaaacctaggatacccaggtacctagggtcatttgctccccgttccccgcggcggacatccgaatatatc
diff --git a/testdata/spanningtile/input1.2.fasta b/testdata/spanningtile/input1.2.fasta
new file mode 100644 (file)
index 0000000..8bd6142
--- /dev/null
@@ -0,0 +1,20 @@
+>chr1
+nnngtctacctcgagaagccccgacctctgaataagatctttgaacatctcaagggattgtgtatcttgttgggtgtacgcgcgccagcccgcagcatta
+ggagaactgtgctccgccttcaga
+ccccttgggtaaaatgccgcgcaatatgttgattacacttgctgcccatctgaaaggtcgccttatcaatcctatgctgaatgccctctaaggagttcca
+acacatgctagcgcgtcggggtgg
+tcgccgcatgggacatgttggggtcccgtagctccggtcgatgtaggcacgcgttttgccgaggagatagatcatcagctgacctatagattcgtctgtc
+gactctagcagagtggccagccac
+aaggtttatattcagtcgaaatggacgggtcccgaacttgcacggacctaacgggactcgcctttcgtaataaccgaacctctaggccgccgcgagatca
+cctcccgagccgagccacccgtca
+tgatagccatcccgcacctagcgcacggaacttcgaccgatcccatattaacgtgtctcttatcctcccgctactgcgtgccttgccactcgtttactat
+>chr2
+tttttatagcagtagcggttgcgataatgcgcactaaggtggccataacttagccacacagactgcgacctcggtgtcaatctttaggcgatgactagtg
+gttattaataataacttatcatca
+cttccggggaaactagcgtaaaaaccgccgtcgcagtataccaccttatacctgtgccactcaatacaggagttgctcagcccaagacaccaacgactaa
+gctctcaaaccttgtatttttctt
+atgtcctcgcttggattatggggactcgcagtaaatgacaaccgtgcccgctggccctggccggaccgcgcgtccgtaagtagcgatcgagtcctgcagt
+aaaactgatccaaaaaaaatacaa
+acaccacgtttataccctgttgagtcgccacgtaacgtattaacgtatgaacggcccggttttcttatctcgcaccgctagtttgccctggcggtcgtgg
+cctatgagtcaatcctattttcaa
+gatgagtatgtataaggcggcctgtcacgtgaaacctaggatacccaggtacctagggtcatttgctccccgttccccgcggcggacatccgaatatatc