16 "git.arvados.org/arvados.git/sdk/go/arvados"
17 log "github.com/sirupsen/logrus"
24 func (cmd *stats) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
28 fmt.Fprintf(stderr, "%s\n", err)
31 flags := flag.NewFlagSet("", flag.ContinueOnError)
32 flags.SetOutput(stderr)
33 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
34 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
35 projectUUID := flags.String("project", "", "project `UUID` for output data")
36 priority := flags.Int("priority", 500, "container request priority")
37 inputFilename := flags.String("i", "-", "input `file`")
38 outputFilename := flags.String("o", "-", "output `file`")
39 flags.BoolVar(&cmd.debugUnplaced, "debug-unplaced", false, "output full list of unplaced tags")
40 err = flags.Parse(args)
41 if err == flag.ErrHelp {
44 } else if err != nil {
50 log.Println(http.ListenAndServe(*pprof, nil))
55 if *outputFilename != "-" {
56 err = errors.New("cannot specify output file in container mode: not implemented")
59 runner := arvadosContainerRunner{
60 Name: "lightning stats",
61 Client: arvados.NewClientFromEnv(),
62 ProjectUUID: *projectUUID,
67 err = runner.TranslatePaths(inputFilename)
71 runner.Args = []string{"stats", "-local=true", fmt.Sprintf("-debug-unplaced=%v", cmd.debugUnplaced), "-i", *inputFilename, "-o", "/mnt/output/stats.json"}
73 output, err = runner.Run()
77 fmt.Fprintln(stdout, output+"/stats.json")
81 var input io.ReadCloser
82 if *inputFilename == "-" {
83 input = ioutil.NopCloser(stdin)
85 input, err = os.Open(*inputFilename)
92 var output io.WriteCloser
93 if *outputFilename == "-" {
94 output = nopCloser{stdout}
96 output, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
103 bufw := bufio.NewWriter(output)
104 err = cmd.doStats(input, bufw)
119 func (cmd *stats) doStats(input io.Reader, output io.Writer) error {
123 TagsPlacedNTimes []int // a[x]==y means there were y tags that placed x times
126 NCVariantsBySize []int
127 UnplacedTags []string `json:",omitempty"`
131 var tagPlacements []int
132 dec := gob.NewDecoder(bufio.NewReaderSize(input, 1<<26))
135 err := dec.Decode(&ent)
138 } else if err != nil {
139 return fmt.Errorf("gob decode: %w", err)
141 ret.Genomes += len(ent.CompactGenomes)
142 ret.TileVariants += len(ent.TileVariants)
143 if len(ent.TagSet) > 0 {
145 return errors.New("invalid input: contains multiple tagsets")
147 ret.Tags = len(ent.TagSet)
150 for _, g := range ent.CompactGenomes {
151 if need := (len(g.Variants)+1)/2 - len(tagPlacements); need > 0 {
152 tagPlacements = append(tagPlacements, make([]int, need)...)
154 for idx, v := range g.Variants {
156 tagPlacements[idx/2]++
160 for _, tv := range ent.TileVariants {
161 if need := 1 + len(tv.Sequence) - len(ret.VariantsBySize); need > 0 {
162 ret.VariantsBySize = append(ret.VariantsBySize, make([]int, need)...)
163 ret.NCVariantsBySize = append(ret.NCVariantsBySize, make([]int, need)...)
167 for _, b := range tv.Sequence {
168 if b != 'a' && b != 'c' && b != 'g' && b != 't' {
174 ret.NCVariantsBySize[len(tv.Sequence)]++
176 ret.VariantsBySize[len(tv.Sequence)]++
180 for id, p := range tagPlacements {
181 for len(ret.TagsPlacedNTimes) <= p {
182 ret.TagsPlacedNTimes = append(ret.TagsPlacedNTimes, 0)
184 ret.TagsPlacedNTimes[p]++
185 if cmd.debugUnplaced && p == 0 {
186 ret.UnplacedTags = append(ret.UnplacedTags, fmt.Sprintf("%d %s", id, tagSet[id]))
190 return json.NewEncoder(output).Encode(ret)