From dcfa4d7541c720b503ed1b0bde689158d162ac5b Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Wed, 16 Mar 2022 13:16:22 -0400 Subject: [PATCH] Write chunk-tag-offset.csv with chunked tilevariant# matrix. refs #17996 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- slicenumpy.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/slicenumpy.go b/slicenumpy.go index 49b81ca6e0..e0d58c4891 100644 --- a/slicenumpy.go +++ b/slicenumpy.go @@ -360,6 +360,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s onehotChunkSize = make([]uint32, len(infiles)) onehotXrefs = make([][]onehotXref, len(infiles)) } + chunkStartTag := make([]tagID, len(infiles)) throttleMem := throttle{Max: cmd.threads} // TODO: estimate using mem and data size throttleNumpyMem := throttle{Max: cmd.threads/2 + 1} @@ -425,6 +426,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s } tagstart := cgs[cmd.cgnames[0]].StartTag tagend := cgs[cmd.cgnames[0]].EndTag + chunkStartTag[infileIdx] = tagstart // TODO: filters @@ -1004,6 +1006,28 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s return 1 } } + if !*mergeOutput && !*onehotChunked && !*onehotSingle { + tagoffsetFilename := *outputDir + "/chunk-tag-offset.csv" + log.Infof("writing tag offsets to %s", tagoffsetFilename) + var f *os.File + f, err = os.Create(tagoffsetFilename) + if err != nil { + return 1 + } + defer f.Close() + for idx, offset := range chunkStartTag { + _, err = fmt.Fprintf(f, "%q,%d\n", fmt.Sprintf("matrix.%04d.npy", idx), offset) + if err != nil { + err = fmt.Errorf("write %s: %w", tagoffsetFilename, err) + return 1 + } + } + err = f.Close() + if err != nil { + err = fmt.Errorf("close %s: %w", tagoffsetFilename, err) + return 1 + } + } return 0 } -- 2.30.2