Fix some tests.
[lightning.git] / annotate.go
index ac34ada40fd62089ef263d3983fba75fca57661f..00f1cad1556147c2b2ba31f2e8e3b4483ad02e20 100644 (file)
@@ -1,3 +1,7 @@
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
 package lightning
 
 import (
@@ -23,9 +27,11 @@ import (
 )
 
 type annotatecmd struct {
-       variantHash bool
-       maxTileSize int
-       tag2tagid   map[string]tagID
+       dropTiles        []bool
+       variantHash      bool
+       maxTileSize      int
+       tag2tagid        map[string]tagID
+       reportAnnotation func(tag tagID, outcol int, variant tileVariantID, refname string, seqname string, pdi hgvs.Variant)
 }
 
 func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
@@ -51,6 +57,9 @@ func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader,
                return 0
        } else if err != nil {
                return 2
+       } else if flags.NArg() > 0 {
+               err = fmt.Errorf("errant command line arguments after parsed flags: %v", flags.Args())
+               return 2
        }
 
        if *pprof != "" {
@@ -112,7 +121,7 @@ func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader,
                retainNoCalls:       true,
                retainTileSequences: true,
        }
-       err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil)
+       err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"))
        if err != nil {
                return 1
        }
@@ -183,6 +192,9 @@ func (cmd *annotatecmd) exportTileDiffs(outw io.Writer, tilelib *tileLibrary) er
                for _, seqname := range seqnames {
                        seqname := seqname
                        throttle.Acquire()
+                       if throttle.Err() != nil {
+                               break
+                       }
                        go func() {
                                defer throttle.Release()
                                throttle.Report(cmd.annotateSequence(throttle, outch, tilelib, taglen, refname, seqname, refcs[seqname], len(refs) > 1))
@@ -194,6 +206,10 @@ func (cmd *annotatecmd) exportTileDiffs(outw io.Writer, tilelib *tileLibrary) er
 }
 
 func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string, tilelib *tileLibrary, taglen int, refname, seqname string, reftiles []tileLibRef, refnamecol bool) error {
+       refnamefield := ""
+       if refnamecol {
+               refnamefield = "," + trimFilenameForLabel(refname)
+       }
        var refseq []byte
        // tilestart[123] is the index into refseq
        // where the tile for tag 123 was placed.
@@ -216,17 +232,29 @@ func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string
                tileend[libref.Tag] = len(refseq)
        }
        log.Infof("seq %s len(refseq) %d len(tilestart) %d", seqname, len(refseq), len(tilestart))
+       // outtag is tag's index in the subset of tags that aren't
+       // dropped. If there are 10M tags and half are dropped by
+       // dropTiles, tag ranges from 0 to 10M-1 and outtag ranges
+       // from 0 to 5M-1.
+       //
+       // IOW, in the matrix built by cgs2array(), {tag} is
+       // represented by columns {outtag}*2 and {outtag}*2+1.
+       outcol := -1
        for tag, tvs := range tilelib.variant {
+               if len(cmd.dropTiles) > tag && cmd.dropTiles[tag] {
+                       continue
+               }
                tag := tagID(tag)
+               outcol++
+               // Must shadow outcol var to use safely in goroutine below.
+               outcol := outcol
                refstart, ok := tilestart[tag]
                if !ok {
-                       // Tag didn't place on this
-                       // reference sequence. (It
-                       // might place on the same
-                       // chromosome in a genome
-                       // anyway, but we don't output
-                       // the annotations that would
-                       // result.)
+                       // Tag didn't place on this reference
+                       // sequence. (It might place on the same
+                       // chromosome in a genome anyway, but we don't
+                       // output the annotations that would result.)
+                       // outch <- fmt.Sprintf("%d,%d,-1%s\n", tag, outcol, refnamefield)
                        continue
                }
                for variant := 1; variant <= len(tvs); variant++ {
@@ -275,11 +303,10 @@ func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string
                                        } else {
                                                varid = fmt.Sprintf("%d", variant)
                                        }
-                                       refnamefield := ""
-                                       if refnamecol {
-                                               refnamefield = "," + trimFilenameForLabel(refname)
+                                       outch <- fmt.Sprintf("%d,%d,%s%s,%s:g.%s\n", tag, outcol, varid, refnamefield, seqname, diff.String())
+                                       if cmd.reportAnnotation != nil {
+                                               cmd.reportAnnotation(tag, outcol, variant, refname, seqname, diff)
                                        }
-                                       outch <- fmt.Sprintf("%d,%s%s,%s:g.%s\n", tag, varid, refnamefield, seqname, diff.String())
                                }
                        }()
                }