-package main
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lightning
import (
"bufio"
- "encoding/gob"
"encoding/json"
"errors"
"flag"
"net/http"
_ "net/http/pprof"
"os"
+ "strings"
"git.arvados.org/arvados.git/sdk/go/arvados"
log "github.com/sirupsen/logrus"
)
-type stats struct{}
+type statscmd struct {
+ debugUnplaced bool
+}
-func (cmd *stats) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+func (cmd *statscmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
var err error
defer func() {
if err != nil {
priority := flags.Int("priority", 500, "container request priority")
inputFilename := flags.String("i", "-", "input `file`")
outputFilename := flags.String("o", "-", "output `file`")
+ flags.BoolVar(&cmd.debugUnplaced, "debug-unplaced", false, "output full list of unplaced tags")
err = flags.Parse(args)
if err == flag.ErrHelp {
err = nil
return 0
} else if err != nil {
return 2
+ } else if flags.NArg() > 0 {
+ err = fmt.Errorf("errant command line arguments after parsed flags: %v", flags.Args())
+ return 2
}
if *pprof != "" {
if err != nil {
return 1
}
- runner.Args = []string{"stats", "-local=true", "-i", *inputFilename, "-o", "/mnt/output/stats.json"}
+ runner.Args = []string{"stats", "-local=true", fmt.Sprintf("-debug-unplaced=%v", cmd.debugUnplaced), "-i", *inputFilename, "-o", "/mnt/output/stats.json"}
var output string
output, err = runner.Run()
if err != nil {
}
bufw := bufio.NewWriter(output)
- cmd.readLibrary(input, bufw)
+ err = cmd.doStats(input, strings.HasSuffix(*inputFilename, ".gz"), bufw)
+ if err != nil {
+ return 1
+ }
err = bufw.Flush()
if err != nil {
return 1
return 0
}
-func (cmd *stats) readLibrary(input io.Reader, output io.Writer) error {
+func (cmd *statscmd) doStats(input io.Reader, gz bool, output io.Writer) error {
var ret struct {
Genomes int
+ CalledBases []int64
Tags int
+ TagsPlacedNTimes []int // a[x]==y means there were y tags that placed x times
TileVariants int
VariantsBySize []int
NCVariantsBySize []int
+ UnplacedTags []string `json:",omitempty"`
}
- dec := gob.NewDecoder(bufio.NewReaderSize(input, 1<<26))
- for {
- var ent LibraryEntry
- err := dec.Decode(&ent)
- if err == io.EOF {
- break
- } else if err != nil {
- return err
- }
+ var tagSet [][]byte
+ var tagPlacements []int
+ tileVariantCalls := map[tileLibRef]int{}
+ err := DecodeLibrary(input, gz, func(ent *LibraryEntry) error {
ret.Genomes += len(ent.CompactGenomes)
- ret.Tags += len(ent.TagSet)
ret.TileVariants += len(ent.TileVariants)
+ if len(ent.TagSet) > 0 {
+ if ret.Tags > 0 {
+ return errors.New("invalid input: contains multiple tagsets")
+ }
+ ret.Tags = len(ent.TagSet)
+ tagSet = ent.TagSet
+ }
for _, tv := range ent.TileVariants {
if need := 1 + len(tv.Sequence) - len(ret.VariantsBySize); need > 0 {
ret.VariantsBySize = append(ret.VariantsBySize, make([]int, need)...)
ret.NCVariantsBySize = append(ret.NCVariantsBySize, make([]int, need)...)
}
+ calls := 0
hasNoCalls := false
for _, b := range tv.Sequence {
- if b != 'a' && b != 'c' && b != 'g' && b != 't' {
+ if b == 'a' || b == 'c' || b == 'g' || b == 't' {
+ calls++
+ } else {
hasNoCalls = true
}
}
} else {
ret.VariantsBySize[len(tv.Sequence)]++
}
+
+ tileVariantCalls[tileLibRef{Tag: tv.Tag, Variant: tv.Variant}] = calls
}
+ for _, g := range ent.CompactGenomes {
+ if need := (len(g.Variants)+1)/2 - len(tagPlacements); need > 0 {
+ tagPlacements = append(tagPlacements, make([]int, need)...)
+ }
+ calledBases := int64(0)
+ for idx, v := range g.Variants {
+ if v > 0 {
+ tagPlacements[idx/2]++
+ calledBases += int64(tileVariantCalls[tileLibRef{Tag: tagID(idx / 2), Variant: v}])
+ }
+ }
+ ret.CalledBases = append(ret.CalledBases, calledBases)
+ }
+ return nil
+ })
+ if err != nil {
+ return err
}
+ for id, p := range tagPlacements {
+ for len(ret.TagsPlacedNTimes) <= p {
+ ret.TagsPlacedNTimes = append(ret.TagsPlacedNTimes, 0)
+ }
+ ret.TagsPlacedNTimes[p]++
+ if cmd.debugUnplaced && p == 0 {
+ ret.UnplacedTags = append(ret.UnplacedTags, fmt.Sprintf("%d %s", id, tagSet[id]))
+ }
+ }
+
return json.NewEncoder(output).Encode(ret)
}