chi2Cases []bool
chi2PValue float64
pvalueMinFrequency float64
+ maxFrequency float64
pcaComponents int
minCoverage int
includeVariant1 bool
flags.IntVar(&cmd.threads, "threads", 16, "number of memory-hungry assembly threads, and number of VCPUs to request for arvados container")
flags.Float64Var(&cmd.chi2PValue, "chi2-p-value", 1, "do Χ² test (or logistic regression if -samples file has PCA components) and omit columns with p-value above this threshold")
flags.Float64Var(&cmd.pvalueMinFrequency, "pvalue-min-frequency", 0.01, "skip p-value calculation on tile variants below this frequency in the training set")
+ flags.Float64Var(&cmd.maxFrequency, "max-frequency", 1, "do not output variants above this frequency in the training set")
flags.BoolVar(&cmd.includeVariant1, "include-variant-1", false, "include most common variant when building one-hot matrix")
cmd.filter.Flags(flags)
err := flags.Parse(args)
"-max-pca-tiles=" + fmt.Sprintf("%d", *maxPCATiles),
"-chi2-p-value=" + fmt.Sprintf("%f", cmd.chi2PValue),
"-pvalue-min-frequency=" + fmt.Sprintf("%f", cmd.pvalueMinFrequency),
+ "-max-frequency=" + fmt.Sprintf("%f", cmd.maxFrequency),
"-include-variant-1=" + fmt.Sprintf("%v", cmd.includeVariant1),
"-debug-tag=" + fmt.Sprintf("%d", cmd.debugTag),
}
if err == errSkip {
return nil
} else if err != nil {
- return fmt.Errorf("%04d: DecodeLibrary(%s): err", infileIdx, infile)
+ return fmt.Errorf("%04d: DecodeLibrary(%s): %w", infileIdx, infile, err)
}
tagstart := cgs[cmd.cgnames[0]].StartTag
tagend := cgs[cmd.cgnames[0]].EndTag
for i := range cmd.samples {
cmd.samples[i].pcaComponents = make([]float64, outcols)
for c := 0; c < outcols; c++ {
- cmd.samples[i].pcaComponents[i] = pca.At(i, c)
+ cmd.samples[i].pcaComponents[c] = pca.At(i, c)
}
}
log.Print("done")
}
if col&1 == 0 {
maf = homhet2maf(obs[col : col+2])
- if cmd.pvalueMinFrequency < 1 && maf < cmd.pvalueMinFrequency {
+ if maf < cmd.pvalueMinFrequency {
// Skip both columns (hom and het) if
// allele frequency is below threshold
col++
continue
}
+ if maf > cmd.maxFrequency {
+ // Skip both columns if allele
+ // frequency is above threshold
+ col++
+ continue
+ }
}
atomic.AddInt64(&cmd.pvalueCallCount, 1)
p := cmd.pvalue(obs[col])