+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
package lightning
import (
runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
projectUUID := flags.String("project", "", "project `UUID` for output data")
priority := flags.Int("priority", 500, "container request priority")
- inputFilename := flags.String("i", "-", "input `file`")
- outputDir := flags.String("output-dir", "/tmp", "output `directory`")
+ inputDir := flags.String("input-dir", "./in", "input `directory`")
+ outputDir := flags.String("output-dir", "./out", "output `directory`")
annotationsFilename := flags.String("output-annotations", "", "output `file` for tile variant annotations csv")
librefsFilename := flags.String("output-onehot2tilevar", "", "when using -one-hot, create csv `file` mapping column# to tag# and variant#")
labelsFilename := flags.String("output-labels", "", "output `file` for genome labels csv")
KeepCache: 1,
APIAccess: true,
}
- err = runner.TranslatePaths(inputFilename, regionsFilename)
+ err = runner.TranslatePaths(inputDir, regionsFilename)
if err != nil {
return 1
}
runner.Args = []string{"export-numpy", "-local=true",
"-pprof", ":6060",
fmt.Sprintf("-one-hot=%v", *onehot),
- "-i", *inputFilename,
+ "-input-dir", *inputDir,
"-output-dir", "/mnt/output",
"-output-annotations", "/mnt/output/annotations.csv",
"-output-onehot2tilevar", "/mnt/output/onehot2tilevar.csv",
"-output-labels", "/mnt/output/labels.csv",
"-regions", *regionsFilename,
"-expand-regions", fmt.Sprintf("%d", *expandRegions),
- "-max-variants", fmt.Sprintf("%d", cmd.filter.MaxVariants),
- "-min-coverage", fmt.Sprintf("%f", cmd.filter.MinCoverage),
- "-max-tag", fmt.Sprintf("%d", cmd.filter.MaxTag),
"-chunks", fmt.Sprintf("%d", *chunks),
}
+ runner.Args = append(runner.Args, cmd.filter.Args()...)
var output string
output, err = runner.Run()
if err != nil {
return 0
}
- var input io.ReadCloser
- if *inputFilename == "-" {
- input = ioutil.NopCloser(stdin)
- } else {
- input, err = open(*inputFilename)
- if err != nil {
- return 1
- }
- defer input.Close()
- }
- input = ioutil.NopCloser(bufio.NewReaderSize(input, 8*1024*1024))
tilelib := &tileLibrary{
retainNoCalls: true,
retainTileSequences: true,
compactGenomes: map[string][]tileVariantID{},
}
- err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil)
- if err != nil {
- return 1
- }
- err = input.Close()
+ err = tilelib.LoadDir(context.Background(), *inputDir)
if err != nil {
return 1
}
return
}
defer f.Close()
- npw, err := gonpy.NewWriter(f)
+ // gonpy closes our writer and ignores errors. Give it a nopCloser so we can close f properly.
+ npw, err := gonpy.NewWriter(nopCloser{f})
if err != nil {
lastErr.Store(err)
return
}
npw.Shape = []int{len(names), len(pdis) * 2}
- npw.WriteInt8(data)
- // gonpy closes f and ignores errors, doh.
- // err = f.Close()
- // if err != nil {
- // lastErr.Store(err)
- // return
- // }
+ err = npw.WriteInt8(data)
+ if err != nil {
+ lastErr.Store(err)
+ return
+ }
+ err = f.Close()
+ if err != nil {
+ lastErr.Store(err)
+ return
+ }
}()
}
wg.Wait()
for tag, variants := range tilelib.variant {
lq := lowqual[tag]
for varidx, hash := range variants {
- if len(tilelib.seq[hash]) == 0 {
+ if len(tilelib.hashSequence(hash)) == 0 {
if lq == nil {
lq = map[tileVariantID]bool{}
lowqual[tag] = lq
return
}
-func chooseTiles(tilelib *tileLibrary, regionsFilename string, expandRegions int) (drop []bool, err error) {
- if regionsFilename == "" {
- return
- }
+func makeMask(regionsFilename string, expandRegions int) (*mask, error) {
+ log.Printf("makeMask: reading %s", regionsFilename)
rfile, err := zopen(regionsFilename)
if err != nil {
- return
+ return nil, err
}
defer rfile.Close()
- regions, err := ioutil.ReadAll(rfile)
+ regions, err := io.ReadAll(rfile)
if err != nil {
- return
+ return nil, err
}
- log.Print("chooseTiles: building mask")
- mask := &mask{}
+ log.Print("makeMask: building mask")
+ var mask mask
for _, line := range bytes.Split(regions, []byte{'\n'}) {
if bytes.HasPrefix(line, []byte{'#'}) {
continue
// GFF/GTF
end++
} else {
- err = fmt.Errorf("cannot parse input line as BED or GFF/GTF: %q", line)
- return
+ return nil, fmt.Errorf("cannot parse input line as BED or GFF/GTF: %q", line)
}
}
mask.Add(refseqname, start-expandRegions, end+expandRegions)
}
- log.Print("chooseTiles: mask.Freeze")
+ log.Print("makeMask: mask.Freeze")
mask.Freeze()
+ return &mask, nil
+}
+
+func chooseTiles(tilelib *tileLibrary, regionsFilename string, expandRegions int) (drop []bool, err error) {
+ if regionsFilename == "" {
+ return
+ }
+ mask, err := makeMask(regionsFilename, expandRegions)
+ if err != nil {
+ return
+ }
tagset := tilelib.taglib.Tags()
if len(tagset) == 0 {