"runtime"
"sort"
"strings"
+ "sync/atomic"
"git.arvados.org/arvados.git/sdk/go/arvados"
"github.com/arvados/lightning/hgvs"
RAM: 250000000000,
VCPUs: 32,
Priority: *priority,
- KeepCache: 1,
+ KeepCache: 2,
APIAccess: true,
}
err = runner.TranslatePaths(inputDir, regionsFilename)
}
}
log.Info("loading reference tiles from all slices")
- throttle := throttle{Max: runtime.NumCPU()}
+ throttle := throttle{Max: runtime.GOMAXPROCS(0)}
for _, infile := range infiles {
infile := infile
throttle.Go(func() error {
log.Info("TODO: determining which tiles intersect given regions")
log.Info("generating annotations and numpy matrix for each slice")
+ var done int64
for infileIdx, infile := range infiles {
infileIdx, infile := infileIdx, infile
throttle.Go(func() error {
- defer log.Infof("%s: done", infile)
seq := map[tagID][][]byte{}
cgs := make(map[string]CompactGenome, len(cgnames))
f, err := open(infile)
return err
}
defer f.Close()
+ log.Infof("reading %s", infile)
err = DecodeLibrary(f, strings.HasSuffix(infile, ".gz"), func(ent *LibraryEntry) error {
for _, tv := range ent.TileVariants {
variants := seq[tv.Tag]
tagstart := cgs[cgnames[0]].StartTag
tagend := cgs[cgnames[0]].EndTag
- log.Infof("TODO: %s: filtering", infile)
- log.Infof("TODO: %s: tidying", infile)
- log.Infof("TODO: %s: lowqual to -1", infile)
+ // TODO: filters
+ // TODO: tidy/renumber
annotationsFilename := fmt.Sprintf("%s/matrix.%04d.annotations.csv", *outputDir, infileIdx)
- log.Infof("%s: writing annotations to %s", infile, annotationsFilename)
+ log.Infof("writing %s", annotationsFilename)
annof, err := os.Create(annotationsFilename)
if err != nil {
return err
return err
}
defer output.Close()
- bufw := bufio.NewWriter(output)
+ bufw := bufio.NewWriterSize(output, 1<<26)
npw, err := gonpy.NewWriter(nopCloser{bufw})
if err != nil {
return err
if err != nil {
return err
}
+ log.Infof("%s: done (%d/%d)", infile, int(atomic.AddInt64(&done, 1)), len(infiles))
return nil
})
}