From ac8e070a93cc50cbb3a74695b5d45426c4808eb0 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Fri, 25 Sep 2020 16:29:23 -0400 Subject: [PATCH] Option to treat tiles with no-calls as regular tiles. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- import.go | 15 +++++++++++++-- tilelib.go | 19 +++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/import.go b/import.go index 3ce00ce19c..e24b25dfac 100644 --- a/import.go +++ b/import.go @@ -33,6 +33,7 @@ type importer struct { runLocal bool skipOOO bool outputTiles bool + includeNoCalls bool encoder *gob.Encoder } @@ -52,6 +53,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)") flags.BoolVar(&cmd.skipOOO, "skip-ooo", false, "skip out-of-order tags") flags.BoolVar(&cmd.outputTiles, "output-tiles", false, "include tile variant sequences in output file") + flags.BoolVar(&cmd.includeNoCalls, "include-no-calls", false, "treat tiles with no-calls as regular tiles") priority := flags.Int("priority", 500, "container request priority") pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`") loglevel := flags.String("loglevel", "info", "logging threshold (trace, debug, info, warn, error, fatal, or panic)") @@ -110,7 +112,16 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std err = errors.New("cannot specify output file in container mode: not implemented") return 1 } - runner.Args = append([]string{"import", "-local=true", "-loglevel=" + *loglevel, fmt.Sprintf("-skip-ooo=%v", cmd.skipOOO), "-tag-library", cmd.tagLibraryFile, "-ref", cmd.refFile, fmt.Sprintf("-output-tiles=%v", cmd.outputTiles), "-o", cmd.outputFile}, inputs...) + runner.Args = append([]string{"import", + "-local=true", + "-loglevel=" + *loglevel, + fmt.Sprintf("-skip-ooo=%v", cmd.skipOOO), + fmt.Sprintf("-output-tiles=%v", cmd.outputTiles), + fmt.Sprintf("-include-no-calls=%v", cmd.includeNoCalls), + "-tag-library", cmd.tagLibraryFile, + "-ref", cmd.refFile, + "-o", cmd.outputFile, + }, inputs...) var output string output, err = runner.Run() if err != nil { @@ -143,7 +154,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std bufw := bufio.NewWriter(output) cmd.encoder = gob.NewEncoder(bufw) - tilelib := &tileLibrary{taglib: taglib, skipOOO: cmd.skipOOO} + tilelib := &tileLibrary{taglib: taglib, includeNoCalls: cmd.includeNoCalls, skipOOO: cmd.skipOOO} if cmd.outputTiles { tilelib.encoder = cmd.encoder } diff --git a/tilelib.go b/tilelib.go index ca3857566f..47de7b048f 100644 --- a/tilelib.go +++ b/tilelib.go @@ -46,9 +46,10 @@ func (tseq tileSeq) Variants() ([]tileVariantID, int, int) { } type tileLibrary struct { - skipOOO bool - taglib *tagLibrary - variant [][][blake2b.Size256]byte + includeNoCalls bool + skipOOO bool + taglib *tagLibrary + variant [][][blake2b.Size256]byte // count [][]int // seq map[[blake2b.Size]byte][]byte variants int @@ -148,11 +149,13 @@ func (tilelib *tileLibrary) Len() int { // Return a tileLibRef for a tile with the given tag and sequence, // adding the sequence to the library if needed. func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef { - for _, b := range seq { - if b != 'a' && b != 'c' && b != 'g' && b != 't' { - // return "tile not found" if seq has any - // no-calls - return tileLibRef{tag: tag} + if !tilelib.includeNoCalls { + for _, b := range seq { + if b != 'a' && b != 'c' && b != 'g' && b != 't' { + // return "tile not found" if seq has any + // no-calls + return tileLibRef{tag: tag} + } } } tilelib.mtx.Lock() -- 2.30.2