Option to treat tiles with no-calls as regular tiles.
authorTom Clegg <tom@tomclegg.ca>
Fri, 25 Sep 2020 20:29:23 +0000 (16:29 -0400)
committerTom Clegg <tom@tomclegg.ca>
Fri, 25 Sep 2020 20:29:23 +0000 (16:29 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

import.go
tilelib.go

index 3ce00ce19c2e96f5643a143358692978c3dbf33b..e24b25dfac3ecc26a8aac0346835fb9b527b27f0 100644 (file)
--- a/import.go
+++ b/import.go
@@ -33,6 +33,7 @@ type importer struct {
        runLocal       bool
        skipOOO        bool
        outputTiles    bool
+       includeNoCalls bool
        encoder        *gob.Encoder
 }
 
@@ -52,6 +53,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std
        flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)")
        flags.BoolVar(&cmd.skipOOO, "skip-ooo", false, "skip out-of-order tags")
        flags.BoolVar(&cmd.outputTiles, "output-tiles", false, "include tile variant sequences in output file")
+       flags.BoolVar(&cmd.includeNoCalls, "include-no-calls", false, "treat tiles with no-calls as regular tiles")
        priority := flags.Int("priority", 500, "container request priority")
        pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
        loglevel := flags.String("loglevel", "info", "logging threshold (trace, debug, info, warn, error, fatal, or panic)")
@@ -110,7 +112,16 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std
                        err = errors.New("cannot specify output file in container mode: not implemented")
                        return 1
                }
-               runner.Args = append([]string{"import", "-local=true", "-loglevel=" + *loglevel, fmt.Sprintf("-skip-ooo=%v", cmd.skipOOO), "-tag-library", cmd.tagLibraryFile, "-ref", cmd.refFile, fmt.Sprintf("-output-tiles=%v", cmd.outputTiles), "-o", cmd.outputFile}, inputs...)
+               runner.Args = append([]string{"import",
+                       "-local=true",
+                       "-loglevel=" + *loglevel,
+                       fmt.Sprintf("-skip-ooo=%v", cmd.skipOOO),
+                       fmt.Sprintf("-output-tiles=%v", cmd.outputTiles),
+                       fmt.Sprintf("-include-no-calls=%v", cmd.includeNoCalls),
+                       "-tag-library", cmd.tagLibraryFile,
+                       "-ref", cmd.refFile,
+                       "-o", cmd.outputFile,
+               }, inputs...)
                var output string
                output, err = runner.Run()
                if err != nil {
@@ -143,7 +154,7 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std
        bufw := bufio.NewWriter(output)
        cmd.encoder = gob.NewEncoder(bufw)
 
-       tilelib := &tileLibrary{taglib: taglib, skipOOO: cmd.skipOOO}
+       tilelib := &tileLibrary{taglib: taglib, includeNoCalls: cmd.includeNoCalls, skipOOO: cmd.skipOOO}
        if cmd.outputTiles {
                tilelib.encoder = cmd.encoder
        }
index ca3857566f2500e85823dcc4eecd06370e9389a8..47de7b048f3ecb465a38ac9d8facbc94cf9bef1d 100644 (file)
@@ -46,9 +46,10 @@ func (tseq tileSeq) Variants() ([]tileVariantID, int, int) {
 }
 
 type tileLibrary struct {
-       skipOOO bool
-       taglib  *tagLibrary
-       variant [][][blake2b.Size256]byte
+       includeNoCalls bool
+       skipOOO        bool
+       taglib         *tagLibrary
+       variant        [][][blake2b.Size256]byte
        // count [][]int
        // seq map[[blake2b.Size]byte][]byte
        variants int
@@ -148,11 +149,13 @@ func (tilelib *tileLibrary) Len() int {
 // Return a tileLibRef for a tile with the given tag and sequence,
 // adding the sequence to the library if needed.
 func (tilelib *tileLibrary) getRef(tag tagID, seq []byte) tileLibRef {
-       for _, b := range seq {
-               if b != 'a' && b != 'c' && b != 'g' && b != 't' {
-                       // return "tile not found" if seq has any
-                       // no-calls
-                       return tileLibRef{tag: tag}
+       if !tilelib.includeNoCalls {
+               for _, b := range seq {
+                       if b != 'a' && b != 'c' && b != 'g' && b != 't' {
+                               // return "tile not found" if seq has any
+                               // no-calls
+                               return tileLibRef{tag: tag}
+                       }
                }
        }
        tilelib.mtx.Lock()