16 "git.arvados.org/arvados.git/sdk/go/arvados"
17 log "github.com/sirupsen/logrus"
22 func (cmd *stats) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
26 fmt.Fprintf(stderr, "%s\n", err)
29 flags := flag.NewFlagSet("", flag.ContinueOnError)
30 flags.SetOutput(stderr)
31 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
32 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
33 projectUUID := flags.String("project", "", "project `UUID` for output data")
34 priority := flags.Int("priority", 500, "container request priority")
35 inputFilename := flags.String("i", "-", "input `file`")
36 outputFilename := flags.String("o", "-", "output `file`")
37 err = flags.Parse(args)
38 if err == flag.ErrHelp {
41 } else if err != nil {
47 log.Println(http.ListenAndServe(*pprof, nil))
52 if *outputFilename != "-" {
53 err = errors.New("cannot specify output file in container mode: not implemented")
56 runner := arvadosContainerRunner{
57 Name: "lightning stats",
58 Client: arvados.NewClientFromEnv(),
59 ProjectUUID: *projectUUID,
64 err = runner.TranslatePaths(inputFilename)
68 runner.Args = []string{"stats", "-local=true", "-i", *inputFilename, "-o", "/mnt/output/stats.json"}
70 output, err = runner.Run()
74 fmt.Fprintln(stdout, output+"/stats.json")
78 var input io.ReadCloser
79 if *inputFilename == "-" {
80 input = ioutil.NopCloser(stdin)
82 input, err = os.Open(*inputFilename)
89 var output io.WriteCloser
90 if *outputFilename == "-" {
91 output = nopCloser{stdout}
93 output, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
100 bufw := bufio.NewWriter(output)
101 cmd.doStats(input, bufw)
113 func (cmd *stats) doStats(input io.Reader, output io.Writer) error {
117 TagsPlacedNTimes []int // a[x]==y means there were y tags that placed x times
120 NCVariantsBySize []int
123 var tagPlacements []int
124 dec := gob.NewDecoder(bufio.NewReaderSize(input, 1<<26))
127 err := dec.Decode(&ent)
130 } else if err != nil {
133 ret.Genomes += len(ent.CompactGenomes)
134 ret.TileVariants += len(ent.TileVariants)
135 if len(ent.TagSet) > 0 {
137 return errors.New("invalid input: contains multiple tagsets")
139 ret.Tags = len(ent.TagSet)
141 for _, g := range ent.CompactGenomes {
142 if need := (len(g.Variants)+1)/2 - len(tagPlacements); need > 0 {
143 tagPlacements = append(tagPlacements, make([]int, need)...)
145 for idx, v := range g.Variants {
147 tagPlacements[idx/2]++
151 for _, tv := range ent.TileVariants {
152 if need := 1 + len(tv.Sequence) - len(ret.VariantsBySize); need > 0 {
153 ret.VariantsBySize = append(ret.VariantsBySize, make([]int, need)...)
154 ret.NCVariantsBySize = append(ret.NCVariantsBySize, make([]int, need)...)
158 for _, b := range tv.Sequence {
159 if b != 'a' && b != 'c' && b != 'g' && b != 't' {
165 ret.NCVariantsBySize[len(tv.Sequence)]++
167 ret.VariantsBySize[len(tv.Sequence)]++
171 for _, p := range tagPlacements {
172 for len(ret.TagsPlacedNTimes) <= p {
173 ret.TagsPlacedNTimes = append(ret.TagsPlacedNTimes, 0)
175 ret.TagsPlacedNTimes[p]++
178 return json.NewEncoder(output).Encode(ret)