1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/sdk/go/arvados"
21 log "github.com/sirupsen/logrus"
24 type statscmd struct {
28 func (cmd *statscmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
32 fmt.Fprintf(stderr, "%s\n", err)
35 flags := flag.NewFlagSet("", flag.ContinueOnError)
36 flags.SetOutput(stderr)
37 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
38 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
39 projectUUID := flags.String("project", "", "project `UUID` for output data")
40 priority := flags.Int("priority", 500, "container request priority")
41 inputFilename := flags.String("i", "-", "input `file`")
42 outputFilename := flags.String("o", "-", "output `file`")
43 flags.BoolVar(&cmd.debugUnplaced, "debug-unplaced", false, "output full list of unplaced tags")
44 err = flags.Parse(args)
45 if err == flag.ErrHelp {
48 } else if err != nil {
50 } else if flags.NArg() > 0 {
51 err = fmt.Errorf("errant command line arguments after parsed flags: %v", flags.Args())
57 log.Println(http.ListenAndServe(*pprof, nil))
62 if *outputFilename != "-" {
63 err = errors.New("cannot specify output file in container mode: not implemented")
66 runner := arvadosContainerRunner{
67 Name: "lightning stats",
68 Client: arvados.NewClientFromEnv(),
69 ProjectUUID: *projectUUID,
74 err = runner.TranslatePaths(inputFilename)
78 runner.Args = []string{"stats", "-local=true", fmt.Sprintf("-debug-unplaced=%v", cmd.debugUnplaced), "-i", *inputFilename, "-o", "/mnt/output/stats.json"}
80 output, err = runner.Run()
84 fmt.Fprintln(stdout, output+"/stats.json")
88 var input io.ReadCloser
89 if *inputFilename == "-" {
90 input = ioutil.NopCloser(stdin)
92 input, err = os.Open(*inputFilename)
99 var output io.WriteCloser
100 if *outputFilename == "-" {
101 output = nopCloser{stdout}
103 output, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
110 bufw := bufio.NewWriter(output)
111 err = cmd.doStats(input, strings.HasSuffix(*inputFilename, ".gz"), bufw)
126 func (cmd *statscmd) doStats(input io.Reader, gz bool, output io.Writer) error {
131 TagsPlacedNTimes []int // a[x]==y means there were y tags that placed x times
134 NCVariantsBySize []int
135 UnplacedTags []string `json:",omitempty"`
139 var tagPlacements []int
140 tileVariantCalls := map[tileLibRef]int{}
141 err := DecodeLibrary(input, gz, func(ent *LibraryEntry) error {
142 ret.Genomes += len(ent.CompactGenomes)
143 ret.TileVariants += len(ent.TileVariants)
144 if len(ent.TagSet) > 0 {
146 return errors.New("invalid input: contains multiple tagsets")
148 ret.Tags = len(ent.TagSet)
151 for _, tv := range ent.TileVariants {
152 if need := 1 + len(tv.Sequence) - len(ret.VariantsBySize); need > 0 {
153 ret.VariantsBySize = append(ret.VariantsBySize, make([]int, need)...)
154 ret.NCVariantsBySize = append(ret.NCVariantsBySize, make([]int, need)...)
159 for _, b := range tv.Sequence {
160 if b == 'a' || b == 'c' || b == 'g' || b == 't' {
168 ret.NCVariantsBySize[len(tv.Sequence)]++
170 ret.VariantsBySize[len(tv.Sequence)]++
173 tileVariantCalls[tileLibRef{Tag: tv.Tag, Variant: tv.Variant}] = calls
175 for _, g := range ent.CompactGenomes {
176 if need := (len(g.Variants)+1)/2 - len(tagPlacements); need > 0 {
177 tagPlacements = append(tagPlacements, make([]int, need)...)
179 calledBases := int64(0)
180 for idx, v := range g.Variants {
182 tagPlacements[idx/2]++
183 calledBases += int64(tileVariantCalls[tileLibRef{Tag: tagID(idx / 2), Variant: v}])
186 ret.CalledBases = append(ret.CalledBases, calledBases)
193 for id, p := range tagPlacements {
194 for len(ret.TagsPlacedNTimes) <= p {
195 ret.TagsPlacedNTimes = append(ret.TagsPlacedNTimes, 0)
197 ret.TagsPlacedNTimes[p]++
198 if cmd.debugUnplaced && p == 0 {
199 ret.UnplacedTags = append(ret.UnplacedTags, fmt.Sprintf("%d %s", id, tagSet[id]))
203 return json.NewEncoder(output).Encode(ret)