X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/395c842444856a96a4216353a201ff5f42e3ef64..69b71af4136fdaeeb5c2afbc559208dc5f428c48:/import.go diff --git a/import.go b/import.go index aa83a70798..629fccac2f 100644 --- a/import.go +++ b/import.go @@ -23,20 +23,25 @@ import ( "time" "git.arvados.org/arvados.git/sdk/go/arvados" + "github.com/lucasb-eyer/go-colorful" log "github.com/sirupsen/logrus" + "gonum.org/v1/plot" + "gonum.org/v1/plot/plotter" + "gonum.org/v1/plot/vg" + "gonum.org/v1/plot/vg/draw" ) type importer struct { - tagLibraryFile string - refFile string - outputFile string - projectUUID string - runLocal bool - skipOOO bool - outputTiles bool - includeNoCalls bool - outputStats string - encoder *gob.Encoder + tagLibraryFile string + refFile string + outputFile string + projectUUID string + runLocal bool + skipOOO bool + outputTiles bool + saveIncompleteTiles bool + outputStats string + encoder *gob.Encoder } func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { @@ -55,7 +60,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)") flags.BoolVar(&cmd.skipOOO, "skip-ooo", false, "skip out-of-order tags") flags.BoolVar(&cmd.outputTiles, "output-tiles", false, "include tile variant sequences in output file") - flags.BoolVar(&cmd.includeNoCalls, "include-no-calls", false, "treat tiles with no-calls as regular tiles") + flags.BoolVar(&cmd.saveIncompleteTiles, "save-incomplete-tiles", false, "treat tiles with no-calls as regular tiles") flags.StringVar(&cmd.outputStats, "output-stats", "", "output stats to `file` (json)") priority := flags.Int("priority", 500, "container request priority") pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`") @@ -91,8 +96,8 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std Name: "lightning import", Client: arvados.NewClientFromEnv(), ProjectUUID: cmd.projectUUID, - RAM: 60000000000, - VCPUs: 16, + RAM: 80000000000, + VCPUs: 32, Priority: *priority, } err = runner.TranslatePaths(&cmd.tagLibraryFile, &cmd.refFile, &cmd.outputFile) @@ -107,7 +112,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std } } if cmd.outputFile == "-" { - cmd.outputFile = "/mnt/output/library.gob" + cmd.outputFile = "/mnt/output/library.gob.gz" } else { // Not yet implemented, but this should write // the collection to an existing collection, @@ -120,7 +125,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std "-loglevel=" + *loglevel, fmt.Sprintf("-skip-ooo=%v", cmd.skipOOO), fmt.Sprintf("-output-tiles=%v", cmd.outputTiles), - fmt.Sprintf("-include-no-calls=%v", cmd.includeNoCalls), + fmt.Sprintf("-save-incomplete-tiles=%v", cmd.saveIncompleteTiles), "-output-stats", "/mnt/output/stats.json", "-tag-library", cmd.tagLibraryFile, "-ref", cmd.refFile, @@ -131,7 +136,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std if err != nil { return 1 } - fmt.Fprintln(stdout, output+"/library.gob") + fmt.Fprintln(stdout, output+"/library.gob.gz") return 0 } @@ -145,20 +150,25 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std return 1 } - var output io.WriteCloser + var outw, outf io.WriteCloser if cmd.outputFile == "-" { - output = nopCloser{stdout} + outw = nopCloser{stdout} } else { - output, err = os.OpenFile(cmd.outputFile, os.O_CREATE|os.O_WRONLY, 0777) + outf, err = os.OpenFile(cmd.outputFile, os.O_CREATE|os.O_WRONLY, 0777) if err != nil { return 1 } - defer output.Close() + defer outf.Close() + if strings.HasSuffix(cmd.outputFile, ".gz") { + outw = gzip.NewWriter(outf) + } else { + outw = outf + } } - bufw := bufio.NewWriter(output) + bufw := bufio.NewWriter(outw) cmd.encoder = gob.NewEncoder(bufw) - tilelib := &tileLibrary{taglib: taglib, includeNoCalls: cmd.includeNoCalls, skipOOO: cmd.skipOOO} + tilelib := &tileLibrary{taglib: taglib, retainNoCalls: cmd.saveIncompleteTiles, skipOOO: cmd.skipOOO} if cmd.outputTiles { cmd.encoder.Encode(LibraryEntry{TagSet: taglib.Tags()}) tilelib.encoder = cmd.encoder @@ -177,10 +187,16 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std if err != nil { return 1 } - err = output.Close() + err = outw.Close() if err != nil { return 1 } + if outf != nil && outf != outw { + err = outf.Close() + if err != nil { + return 1 + } + } return 0 } @@ -497,3 +513,73 @@ func flatten(variants [][]tileVariantID) []tileVariantID { } return flat } + +type importstatsplot struct{} + +func (cmd *importstatsplot) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + err := cmd.Plot(stdin, stdout) + if err != nil { + log.Errorf("%s", err) + return 1 + } + return 0 +} + +func (cmd *importstatsplot) Plot(stdin io.Reader, stdout io.Writer) error { + var stats []importStats + err := json.NewDecoder(stdin).Decode(&stats) + if err != nil { + return err + } + + p, err := plot.New() + if err != nil { + return err + } + p.Title.Text = "coverage preserved by import (excl X<0.65)" + p.X.Label.Text = "input base calls ÷ sequence length" + p.Y.Label.Text = "output base calls ÷ input base calls" + p.Add(plotter.NewGrid()) + + data := map[string]plotter.XYs{} + for _, stat := range stats { + data[stat.InputLabel] = append(data[stat.InputLabel], plotter.XY{ + X: float64(stat.InputCoverage) / float64(stat.InputLength), + Y: float64(stat.TileCoverage) / float64(stat.InputCoverage), + }) + } + + labels := []string{} + for label := range data { + labels = append(labels, label) + } + sort.Strings(labels) + palette, err := colorful.SoftPalette(len(labels)) + if err != nil { + return err + } + nextInPalette := 0 + for idx, label := range labels { + s, err := plotter.NewScatter(data[label]) + if err != nil { + return err + } + s.GlyphStyle.Color = palette[idx] + s.GlyphStyle.Radius = vg.Millimeter / 2 + s.GlyphStyle.Shape = draw.CrossGlyph{} + nextInPalette += 7 + p.Add(s) + if false { + p.Legend.Add(label, s) + } + } + p.X.Min = 0.65 + p.X.Max = 1 + + w, err := p.WriterTo(8*vg.Inch, 6*vg.Inch, "svg") + if err != nil { + return err + } + _, err = w.WriteTo(stdout) + return err +}