From 4f220df0aeb61480e2d203857830ad5c2bbed4c0 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 9 Sep 2021 11:28:08 -0400 Subject: [PATCH] Use callbacks in struct instead of args to Load*(). No issue # Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- annotate.go | 2 +- export.go | 2 +- exportnumpy.go | 2 +- flake.go | 2 +- merge.go | 2 +- pca.go | 2 +- tilelib.go | 46 ++++++++++++++++++++++++++++++++-------------- 7 files changed, 38 insertions(+), 20 deletions(-) diff --git a/annotate.go b/annotate.go index 22b4f3bf57..25651889aa 100644 --- a/annotate.go +++ b/annotate.go @@ -118,7 +118,7 @@ func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader, retainNoCalls: true, retainTileSequences: true, } - err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil) + err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz")) if err != nil { return 1 } diff --git a/export.go b/export.go index c74baeb645..1ea08fe757 100644 --- a/export.go +++ b/export.go @@ -175,7 +175,7 @@ func (cmd *exporter) RunCommand(prog string, args []string, stdin io.Reader, std retainTileSequences: true, compactGenomes: map[string][]tileVariantID{}, } - err = tilelib.LoadDir(context.Background(), *inputDir, nil) + err = tilelib.LoadDir(context.Background(), *inputDir) if err != nil { return 1 } diff --git a/exportnumpy.go b/exportnumpy.go index 8815b4b26a..39f228a1b0 100644 --- a/exportnumpy.go +++ b/exportnumpy.go @@ -112,7 +112,7 @@ func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, retainTileSequences: true, compactGenomes: map[string][]tileVariantID{}, } - err = tilelib.LoadDir(context.Background(), *inputDir, nil) + err = tilelib.LoadDir(context.Background(), *inputDir) if err != nil { return 1 } diff --git a/flake.go b/flake.go index 7631233854..7f583fd26f 100644 --- a/flake.go +++ b/flake.go @@ -87,7 +87,7 @@ func (cmd *flakecmd) RunCommand(prog string, args []string, stdin io.Reader, std retainTileSequences: true, compactGenomes: map[string][]tileVariantID{}, } - err = tilelib.LoadDir(context.Background(), *inputDir, nil) + err = tilelib.LoadDir(context.Background(), *inputDir) if err != nil { return 1 } diff --git a/merge.go b/merge.go index d59ff29a91..68560edbd0 100644 --- a/merge.go +++ b/merge.go @@ -177,7 +177,7 @@ func (cmd *merger) doMerge() error { go func(input string) { defer wg.Done() log.Printf("%s: reading", input) - err := cmd.tilelib.LoadGob(ctx, rdr, strings.HasSuffix(input, ".gz"), nil) + err := cmd.tilelib.LoadGob(ctx, rdr, strings.HasSuffix(input, ".gz")) if err != nil { cmd.setError(fmt.Errorf("%s: load failed: %w", input, err)) cancel() diff --git a/pca.go b/pca.go index f880b1d043..121925d4c5 100644 --- a/pca.go +++ b/pca.go @@ -151,7 +151,7 @@ func (cmd *goPCA) RunCommand(prog string, args []string, stdin io.Reader, stdout retainNoCalls: true, compactGenomes: map[string][]tileVariantID{}, } - err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil) + err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz")) if err != nil { return 1 } diff --git a/tilelib.go b/tilelib.go index a3b63ec78f..4ad607c1e5 100644 --- a/tilelib.go +++ b/tilelib.go @@ -72,6 +72,10 @@ type tileLibrary struct { // if non-nil, write out any tile variants added while tiling encoder *gob.Encoder + onAddTileVariant func(libref tileLibRef, hash [blake2b.Size256]byte, seq []byte) error + onAddGenome func(CompactGenome) error + onAddRefseq func(CompactSequence) error + mtx sync.RWMutex vlock []sync.Locker } @@ -121,7 +125,7 @@ func (tilelib *tileLibrary) loadTileVariants(tvs []TileVariant, variantmap map[t return nil } -func (tilelib *tileLibrary) loadCompactGenomes(cgs []CompactGenome, variantmap map[tileLibRef]tileVariantID, onLoadGenome func(CompactGenome)) error { +func (tilelib *tileLibrary) loadCompactGenomes(cgs []CompactGenome, variantmap map[tileLibRef]tileVariantID) error { log.Debugf("loadCompactGenomes: %d", len(cgs)) var wg sync.WaitGroup errs := make(chan error, 1) @@ -150,8 +154,15 @@ func (tilelib *tileLibrary) loadCompactGenomes(cgs []CompactGenome, variantmap m // log.Tracef("loadCompactGenomes: cg %s tag %d variant %d => %d", cg.Name, tag, variant, newvariant) cg.Variants[i] = newvariant } - if onLoadGenome != nil { - onLoadGenome(cg) + if tilelib.onAddGenome != nil { + err := tilelib.onAddGenome(cg) + if err != nil { + select { + case errs <- err: + default: + } + return + } } if tilelib.encoder != nil { err := tilelib.encoder.Encode(LibraryEntry{ @@ -203,6 +214,12 @@ func (tilelib *tileLibrary) loadCompactSequences(cseqs []CompactSequence, varian return err } } + if tilelib.onAddRefseq != nil { + err := tilelib.onAddRefseq(cseq) + if err != nil { + return err + } + } log.Infof("loadCompactSequences: checking %s done", cseq.Name) } tilelib.mtx.Lock() @@ -217,7 +234,7 @@ func (tilelib *tileLibrary) loadCompactSequences(cseqs []CompactSequence, varian return nil } -func (tilelib *tileLibrary) LoadDir(ctx context.Context, path string, onLoadGenome func(CompactGenome)) error { +func (tilelib *tileLibrary) LoadDir(ctx context.Context, path string) error { var files []string var walk func(string) error walk = func(path string) error { @@ -318,7 +335,7 @@ func (tilelib *tileLibrary) LoadDir(ctx context.Context, path string, onLoadGeno for _, cgs := range allcgs { flatcgs = append(flatcgs, cgs...) } - err = tilelib.loadCompactGenomes(flatcgs, allvariantmap, onLoadGenome) + err = tilelib.loadCompactGenomes(flatcgs, allvariantmap) if err != nil { return err } @@ -455,9 +472,7 @@ func (tilelib *tileLibrary) WriteDir(dir string) error { // Load library data from rdr. Tile variants might be renumbered in // the process; in that case, genomes variants will be renumbered to // match. -// -// If onLoadGenome is non-nil, call it on each CompactGenome entry. -func (tilelib *tileLibrary) LoadGob(ctx context.Context, rdr io.Reader, gz bool, onLoadGenome func(CompactGenome)) error { +func (tilelib *tileLibrary) LoadGob(ctx context.Context, rdr io.Reader, gz bool) error { cgs := []CompactGenome{} cseqs := []CompactSequence{} variantmap := map[tileLibRef]tileVariantID{} @@ -481,7 +496,7 @@ func (tilelib *tileLibrary) LoadGob(ctx context.Context, rdr io.Reader, gz bool, if ctx.Err() != nil { return ctx.Err() } - err = tilelib.loadCompactGenomes(cgs, variantmap, onLoadGenome) + err = tilelib.loadCompactGenomes(cgs, variantmap) if err != nil { return err } @@ -764,12 +779,12 @@ func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef { locker.Unlock() } + saveSeq := seq + if dropSeq { + // Save the hash, but not the sequence + saveSeq = nil + } if tilelib.encoder != nil { - saveSeq := seq - if dropSeq { - // Save the hash, but not the sequence - saveSeq = nil - } tilelib.encoder.Encode(LibraryEntry{ TileVariants: []TileVariant{{ Tag: tag, @@ -779,6 +794,9 @@ func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef { }}, }) } + if tilelib.onAddTileVariant != nil { + tilelib.onAddTileVariant(tileLibRef{tag, variant}, seqhash, saveSeq) + } return tileLibRef{Tag: tag, Variant: variant} } -- 2.30.2