+ if !*mergeOutput && !*onehotChunked && !*onehotSingle && !*onlyPCA {
+ tagoffsetFilename := *outputDir + "/chunk-tag-offset.csv"
+ log.Infof("writing tag offsets to %s", tagoffsetFilename)
+ var f *os.File
+ f, err = os.Create(tagoffsetFilename)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ for idx, offset := range chunkStartTag {
+ _, err = fmt.Fprintf(f, "%q,%d\n", fmt.Sprintf("matrix.%04d.npy", idx), offset)
+ if err != nil {
+ err = fmt.Errorf("write %s: %w", tagoffsetFilename, err)
+ return err
+ }
+ }
+ err = f.Close()
+ if err != nil {
+ err = fmt.Errorf("close %s: %w", tagoffsetFilename, err)
+ return err
+ }
+ }
+
+ return nil
+}
+
+type sampleInfo struct {
+ id string
+ isCase bool
+ isControl bool
+ isTraining bool
+ isValidation bool
+ pcaComponents []float64
+}
+
+// Read samples.csv file with case/control and training/validation
+// flags.
+func loadSampleInfo(samplesFilename string) ([]sampleInfo, error) {
+ var si []sampleInfo
+ f, err := open(samplesFilename)
+ if err != nil {
+ return nil, err
+ }
+ buf, err := io.ReadAll(f)
+ f.Close()
+ if err != nil {
+ return nil, err
+ }
+ lineNum := 0
+ for _, csv := range bytes.Split(buf, []byte{'\n'}) {
+ lineNum++
+ if len(csv) == 0 {
+ continue
+ }
+ split := strings.Split(string(csv), ",")
+ if len(split) < 4 {
+ return nil, fmt.Errorf("%d fields < 4 in %s line %d: %q", len(split), samplesFilename, lineNum, csv)
+ }
+ if split[0] == "Index" && split[1] == "SampleID" && split[2] == "CaseControl" && split[3] == "TrainingValidation" {
+ continue
+ }
+ idx, err := strconv.Atoi(split[0])
+ if err != nil {
+ if lineNum == 1 {
+ return nil, fmt.Errorf("header does not look right: %q", csv)
+ }
+ return nil, fmt.Errorf("%s line %d: index: %s", samplesFilename, lineNum, err)
+ }
+ if idx != len(si) {
+ return nil, fmt.Errorf("%s line %d: index %d out of order", samplesFilename, lineNum, idx)
+ }
+ var pcaComponents []float64
+ if len(split) > 4 {
+ for _, s := range split[4:] {
+ f, err := strconv.ParseFloat(s, 64)
+ if err != nil {
+ return nil, fmt.Errorf("%s line %d: cannot parse float %q: %s", samplesFilename, lineNum, s, err)
+ }
+ pcaComponents = append(pcaComponents, f)