Write chunk-tag-offset.csv with chunked tilevariant# matrix.
authorTom Clegg <tom@curii.com>
Wed, 16 Mar 2022 17:16:22 +0000 (13:16 -0400)
committerTom Clegg <tom@curii.com>
Wed, 16 Mar 2022 17:16:22 +0000 (13:16 -0400)
refs #17996

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

slicenumpy.go

index 49b81ca6e02ae278d272d666ee71f094bffd6e0a..e0d58c48917d4f16bb29fdc22a6be2d8ca7f4a96 100644 (file)
@@ -360,6 +360,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                onehotChunkSize = make([]uint32, len(infiles))
                onehotXrefs = make([][]onehotXref, len(infiles))
        }
+       chunkStartTag := make([]tagID, len(infiles))
 
        throttleMem := throttle{Max: cmd.threads} // TODO: estimate using mem and data size
        throttleNumpyMem := throttle{Max: cmd.threads/2 + 1}
@@ -425,6 +426,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                        }
                        tagstart := cgs[cmd.cgnames[0]].StartTag
                        tagend := cgs[cmd.cgnames[0]].EndTag
+                       chunkStartTag[infileIdx] = tagstart
 
                        // TODO: filters
 
@@ -1004,6 +1006,28 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s
                        return 1
                }
        }
+       if !*mergeOutput && !*onehotChunked && !*onehotSingle {
+               tagoffsetFilename := *outputDir + "/chunk-tag-offset.csv"
+               log.Infof("writing tag offsets to %s", tagoffsetFilename)
+               var f *os.File
+               f, err = os.Create(tagoffsetFilename)
+               if err != nil {
+                       return 1
+               }
+               defer f.Close()
+               for idx, offset := range chunkStartTag {
+                       _, err = fmt.Fprintf(f, "%q,%d\n", fmt.Sprintf("matrix.%04d.npy", idx), offset)
+                       if err != nil {
+                               err = fmt.Errorf("write %s: %w", tagoffsetFilename, err)
+                               return 1
+                       }
+               }
+               err = f.Close()
+               if err != nil {
+                       err = fmt.Errorf("close %s: %w", tagoffsetFilename, err)
+                       return 1
+               }
+       }
        return 0
 }