-package main
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lightning
import (
"bufio"
)
type annotatecmd struct {
- variantHash bool
- maxTileSize int
- tag2tagid map[string]tagID
+ dropTiles []bool
+ variantHash bool
+ maxTileSize int
+ tag2tagid map[string]tagID
+ reportAnnotation func(tag tagID, outcol int, variant tileVariantID, refname string, seqname string, pdi hgvs.Variant)
}
func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
return 0
} else if err != nil {
return 2
+ } else if flags.NArg() > 0 {
+ err = fmt.Errorf("errant command line arguments after parsed flags: %v", flags.Args())
+ return 2
}
if *pprof != "" {
if err != nil {
return 1
}
- runner.Args = []string{"annotate", "-local=true", fmt.Sprintf("-variant-hash=%v", cmd.variantHash), "-max-tile-size", strconv.Itoa(cmd.maxTileSize), "-i", *inputFilename, "-o", "/mnt/output/tilevariants.tsv"}
+ runner.Args = []string{"annotate", "-local=true", fmt.Sprintf("-variant-hash=%v", cmd.variantHash), "-max-tile-size", strconv.Itoa(cmd.maxTileSize), "-i", *inputFilename, "-o", "/mnt/output/tilevariants.csv"}
var output string
output, err = runner.Run()
if err != nil {
return 1
}
- fmt.Fprintln(stdout, output+"/tilevariants.tsv")
+ fmt.Fprintln(stdout, output+"/tilevariants.csv")
return 0
}
}
defer output.Close()
}
- bufw := bufio.NewWriter(output)
+ bufw := bufio.NewWriterSize(output, 4*1024*1024)
tilelib := &tileLibrary{
retainNoCalls: true,
retainTileSequences: true,
}
- err = tilelib.LoadGob(context.Background(), input, nil)
+ err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"))
if err != nil {
return 1
}
for _, seqname := range seqnames {
seqname := seqname
throttle.Acquire()
+ if throttle.Err() != nil {
+ break
+ }
go func() {
defer throttle.Release()
throttle.Report(cmd.annotateSequence(throttle, outch, tilelib, taglen, refname, seqname, refcs[seqname], len(refs) > 1))
}
func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string, tilelib *tileLibrary, taglen int, refname, seqname string, reftiles []tileLibRef, refnamecol bool) error {
+ refnamefield := ""
+ if refnamecol {
+ refnamefield = "," + trimFilenameForLabel(refname)
+ }
var refseq []byte
// tilestart[123] is the index into refseq
// where the tile for tag 123 was placed.
tileend[libref.Tag] = len(refseq)
}
log.Infof("seq %s len(refseq) %d len(tilestart) %d", seqname, len(refseq), len(tilestart))
+ // outtag is tag's index in the subset of tags that aren't
+ // dropped. If there are 10M tags and half are dropped by
+ // dropTiles, tag ranges from 0 to 10M-1 and outtag ranges
+ // from 0 to 5M-1.
+ //
+ // IOW, in the matrix built by cgs2array(), {tag} is
+ // represented by columns {outtag}*2 and {outtag}*2+1.
+ outcol := -1
for tag, tvs := range tilelib.variant {
+ if len(cmd.dropTiles) > tag && cmd.dropTiles[tag] {
+ continue
+ }
tag := tagID(tag)
+ outcol++
+ // Must shadow outcol var to use safely in goroutine below.
+ outcol := outcol
refstart, ok := tilestart[tag]
if !ok {
- // Tag didn't place on this
- // reference sequence. (It
- // might place on the same
- // chromosome in a genome
- // anyway, but we don't output
- // the annotations that would
- // result.)
+ // Tag didn't place on this reference
+ // sequence. (It might place on the same
+ // chromosome in a genome anyway, but we don't
+ // output the annotations that would result.)
+ // outch <- fmt.Sprintf("%d,%d,-1%s\n", tag, outcol, refnamefield)
continue
}
for variant := 1; variant <= len(tvs); variant++ {
} else {
varid = fmt.Sprintf("%d", variant)
}
- refnamefield := ""
- if refnamecol {
- refnamefield = "\t" + refname
+ outch <- fmt.Sprintf("%d,%d,%s%s,%s:g.%s\n", tag, outcol, varid, refnamefield, seqname, diff.String())
+ if cmd.reportAnnotation != nil {
+ cmd.reportAnnotation(tag, outcol, variant, refname, seqname, diff)
}
- outch <- fmt.Sprintf("%d\t%s%s\t%s:g.%s\n", tag, varid, refnamefield, seqname, diff.String())
}
}()
}