+// Read case/control files, remove non-case/control entries from
+// cmd.cgnames, and build cmd.chi2Cases.
+func (cmd *sliceNumpy) useCaseControlFiles() error {
+ if cmd.chi2CaseControlFile == "" {
+ return nil
+ }
+ infiles, err := allFiles(cmd.chi2CaseControlFile, nil)
+ if err != nil {
+ return err
+ }
+ // index in cmd.cgnames => case(true) / control(false)
+ cc := map[int]bool{}
+ for _, infile := range infiles {
+ f, err := open(infile)
+ if err != nil {
+ return err
+ }
+ buf, err := io.ReadAll(f)
+ f.Close()
+ if err != nil {
+ return err
+ }
+ ccCol := -1
+ for _, tsv := range bytes.Split(buf, []byte{'\n'}) {
+ if len(tsv) == 0 {
+ continue
+ }
+ split := strings.Split(string(tsv), "\t")
+ if ccCol < 0 {
+ // header row
+ for col, name := range split {
+ if name == cmd.chi2CaseControlColumn {
+ ccCol = col
+ break
+ }
+ }
+ if ccCol < 0 {
+ return fmt.Errorf("%s: no column named %q in header row %q", infile, cmd.chi2CaseControlColumn, tsv)
+ }
+ continue
+ }
+ if len(split) <= ccCol {
+ continue
+ }
+ pattern := split[0]
+ found := -1
+ for i, name := range cmd.cgnames {
+ if strings.Contains(name, pattern) {
+ if found >= 0 {
+ log.Warnf("pattern %q in %s matches multiple genome IDs (%qs, %q)", pattern, infile, cmd.cgnames[found], name)
+ }
+ found = i
+ }
+ }
+ if found < 0 {
+ log.Warnf("pattern %q in %s does not match any genome IDs", pattern, infile)
+ continue
+ }
+ if split[ccCol] == "0" {
+ cc[found] = false
+ }
+ if split[ccCol] == "1" {
+ cc[found] = true
+ }
+ }
+ }
+ allnames := cmd.cgnames
+ cmd.cgnames = nil
+ cmd.chi2Cases = nil
+ ncases := 0
+ for i, name := range allnames {
+ if cc, ok := cc[i]; ok {
+ cmd.cgnames = append(cmd.cgnames, name)
+ cmd.chi2Cases = append(cmd.chi2Cases, cc)
+ if cc {
+ ncases++
+ }
+ }
+ }
+ log.Printf("%d cases, %d controls, %d neither (dropped)", ncases, len(cmd.cgnames)-ncases, len(allnames)-len(cmd.cgnames))
+ return nil
+}
+