--- /dev/null
+package main
+
+func longestIncreasingSubsequence(srclen int, X func(int) int) []int {
+ if srclen == 0 {
+ return nil
+ }
+ M := make([]int, srclen+1) // M[j] == index into X such that X[M[j]] is the smallest X[k] where k<=i and an increasing subsequence with length j ends at X[k]
+ P := make([]int, srclen) // P[k] == index in X of predecessor of X[k] in longest increasing subsequence ending at X[k]
+ L := 0 // length of longest increasing subsequence found so far
+ for i := range P {
+ lo, hi := 1, L
+ for lo <= hi {
+ mid := (lo + hi + 1) / 2
+ if X(M[mid]) < X(i) {
+ lo = mid + 1
+ } else {
+ hi = mid - 1
+ }
+ }
+ newL := lo
+ if i > 0 {
+ P[i] = M[newL-1]
+ }
+ M[newL] = i
+ if newL > L {
+ L = newL
+ }
+ }
+ ret := make([]int, L)
+ for k, i := M[L], len(ret)-1; i >= 0; k, i = P[k], i-1 {
+ ret[i] = k
+ }
+ return ret
+}
--- /dev/null
+package main
+
+import (
+ "gopkg.in/check.v1"
+)
+
+type lisSuite struct{}
+
+var _ = check.Suite(&lisSuite{})
+
+func (s *lisSuite) Test(c *check.C) {
+ for _, trial := range []struct {
+ in []int
+ out []int
+ }{
+ {},
+ {
+ in: []int{},
+ },
+ {
+ in: []int{0},
+ out: []int{0},
+ },
+ {
+ in: []int{1, 2, 3, 4},
+ out: []int{0, 1, 2, 3},
+ },
+ {
+ in: []int{1, 2, 2, 4},
+ out: []int{0, 2, 3},
+ },
+ {
+ in: []int{4, 3, 2, 1},
+ out: []int{3},
+ },
+ {
+ in: []int{1, 3, 2, 4},
+ out: []int{0, 2, 3},
+ },
+ {
+ in: []int{1, 0, 0, 0, 4},
+ out: []int{3, 4},
+ },
+ {
+ in: []int{0, 1, 2, 1, 4, 5},
+ out: []int{0, 1, 2, 4, 5},
+ },
+ } {
+ c.Logf("=== %v", trial)
+ c.Check(longestIncreasingSubsequence(len(trial.in), func(i int) int { return trial.in[i] }), check.DeepEquals, trial.out)
+ }
+}
})
totalFoundTags += len(found)
+ skipped := 0
path = path[:0]
last := foundtag{tagid: -1}
+ if tilelib.skipOOO {
+ keep := longestIncreasingSubsequence(len(found), func(i int) int { return int(found[i].tagid) })
+ for i, x := range keep {
+ found[i] = found[x]
+ }
+ skipped = len(found) - len(keep)
+ found = found[:len(keep)]
+ }
for i, f := range found {
log.Tracef("%s %s found[%d] == %#v", filelabel, job.label, i, f)
- if tilelib.skipOOO {
- if f.tagid < last.tagid+1 {
- log.Debugf("%s %s skipped out-of-order tag %d (found at %d) because it appears after tag %d (found at %d)", filelabel, job.label, f.tagid, f.pos, last.tagid, last.pos)
- continue
- }
- if f.tagid > last.tagid+1 && // accepting this tag would mean skipping some tags
- i+1 < len(found) && // there is a "next" found tag after this one
- found[i+1].tagid > last.tagid && // next found tag is usable (we haven't already passed it in accepted sequence)
- found[i+1].tagid <= f.tagid { // next found tag is expected before this one (so we can't use both)
- log.Debugf("%s %s skipped out-of-order tag %d (found at %d) because it appears between tag %d (found at %d) and %d (found at %d)", filelabel, job.label, f.tagid, f.pos, last.tagid, last.pos, found[i+1].tagid, found[i+1].pos)
- continue
- }
- }
if last.taglen > 0 {
path = append(path, tilelib.getRef(last.tagid, job.fasta[last.pos:f.pos+f.taglen]))
}
pathcopy := make([]tileLibRef, len(path))
copy(pathcopy, path)
ret[job.label] = pathcopy
- log.Debugf("%s %s tiled with path len %d, skipped %d", filelabel, job.label, len(path), len(found)-len(path))
+ log.Debugf("%s %s tiled with path len %d, skipped %d", filelabel, job.label, len(path), skipped)
totalPathLen += len(path)
}
log.Printf("%s tiled with total path len %d in %d sequences (skipped %d sequences with '_' in name, skipped %d out-of-order tags)", filelabel, totalPathLen, len(ret), skippedSequences, totalFoundTags-totalPathLen)