From 4c2919861ddfc4c48588cf3336f99efea94eee16 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 17 Feb 2022 09:23:20 -0500 Subject: [PATCH] Fix sparse one-hot coordinates for chunk n>0. refs #18581 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- slicenumpy.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/slicenumpy.go b/slicenumpy.go index 8a1dff0f0c..a7c6e24c57 100644 --- a/slicenumpy.go +++ b/slicenumpy.go @@ -345,9 +345,11 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s toMerge = make([][]int16, len(infiles)) } var onehotIndirect [][2][]uint32 // [chunkIndex][axis][index] + var onehotChunkSize []uint32 var onehotXrefs [][]onehotXref if *onehotSingle { onehotIndirect = make([][2][]uint32, len(infiles)) + onehotChunkSize = make([]uint32, len(infiles)) onehotXrefs = make([][]onehotXref, len(infiles)) } @@ -626,12 +628,13 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s } if *onehotSingle { onehotIndirect[infileIdx] = onehotChunk2Indirect(onehotChunk) + onehotChunkSize[infileIdx] = uint32(len(onehotChunk)) onehotXrefs[infileIdx] = onehotXref n := len(onehotIndirect[infileIdx][0]) - log.Infof("%04d: keeping onehot coordinates in memory (n=%d, mem=%d)", infileIdx, n, n*8) + log.Infof("%04d: keeping onehot coordinates in memory (n=%d, mem=%d)", infileIdx, n, n*8*2) } if !(*onehotSingle || *onehotChunked) || *mergeOutput || *hgvsSingle { - log.Infof("%04d: preparing numpy", infileIdx) + log.Infof("%04d: preparing numpy (rows=%d, cols=%d)", infileIdx, len(cmd.cgnames), 2*outcol) throttleNumpyMem.Acquire() rows := len(cmd.cgnames) cols := 2 * outcol @@ -924,16 +927,19 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s } onehot := make([]uint32, nzCount*2) // [r,r,r,...,c,c,c,...] var xrefs []onehotXref + chunkOffset := uint32(0) outcol := 0 for i, part := range onehotIndirect { for i := range part[1] { - part[1][i] += uint32(outcol) + part[1][i] += chunkOffset } copy(onehot[outcol:], part[0]) copy(onehot[outcol+nzCount:], part[1]) - outcol += len(part[0]) xrefs = append(xrefs, onehotXrefs[i]...) + outcol += len(part[0]) + chunkOffset += onehotChunkSize[i] + part[0] = nil part[1] = nil onehotXrefs[i] = nil -- 2.30.2