var chisquared = distuv.ChiSquared{K: 1, Src: rand.NewSource(rand.Uint64())}
-func pvalue(a, b []bool) float64 {
- // !b b
- // !a tab[0] tab[1]
- // a tab[2] tab[3]
- tab := make([]int, 4)
- for ai, aval := range []bool{false, true} {
- for bi, bval := range []bool{false, true} {
- obs := 0
- for i := range a {
- if a[i] == aval && b[i] == bval {
- obs++
- }
+func pvalue(x, y []bool) float64 {
+ var (
+ obs, exp [2]float64
+ sum float64
+ sz = float64(len(y))
+ )
+ for i, yi := range y {
+ if x[i] {
+ if yi {
+ obs[0]++
+ } else {
+ obs[1]++
}
- tab[ai*2+bi] = obs
}
- }
- var sum float64
- for ai := 0; ai < 2; ai++ {
- for bi := 0; bi < 2; bi++ {
- rowtotal := tab[ai*2] + tab[ai*2+1]
- coltotal := tab[bi] + tab[2+bi]
- if rowtotal == 0 || coltotal == 0 {
- return 1
- }
- exp := float64(rowtotal) * float64(coltotal) / float64(len(a))
- obs := tab[ai*2+bi]
- d := float64(obs) - exp
- sum += (d * d) / exp
+ if yi {
+ exp[0]++
+ } else {
+ exp[1]++
}
}
+ if exp[0] == 0 || exp[1] == 0 || obs[0]+obs[1] == 0 {
+ return 1
+ }
+ exp[0] = (obs[0] + obs[1]) * exp[0] / sz
+ exp[1] = (obs[0] + obs[1]) * exp[1] / sz
+ for i := range exp {
+ d := obs[i] - exp[i]
+ sum += d * d / exp[i]
+ }
return 1 - chisquared.CDF(sum)
}
for i := 25; i < 31; i++ {
a[i] = true
}
- for i := 31; i < 39; i++ {
+ for i := 31; i < 40; i++ {
b[i] = true
}
- c.Check(fmt.Sprintf("%.7f", pvalue(a, b)), check.Equals, "0.0006297")
+ c.Check(fmt.Sprintf("%.8f", pvalue(a, b)), check.Equals, "0.04147853")
+
+ a = make([]bool, 54)
+ b = make([]bool, 54)
+ for i := 0; i < 25; i++ {
+ a[i] = true
+ b[i] = true
+ }
+ c.Check(fmt.Sprintf("%.9f", pvalue(a, b)), check.Equals, "0.000000072")
+ for i := range a {
+ a[i] = !a[i]
+ }
+ c.Check(fmt.Sprintf("%.9f", pvalue(a, b)), check.Equals, "0.000000573")
+
+ a = []bool{true, true, true, false, true, false, false, false}
+ b = []bool{true, true, true, true, false, false, false, false}
+ c.Check(fmt.Sprintf("%.8f", pvalue(a, b)), check.Equals, "0.31731051")
for i := range a {
a[i] = !a[i]
}
- c.Check(fmt.Sprintf("%.7f", pvalue(a, b)), check.Equals, "0.0006297")
+ c.Check(fmt.Sprintf("%.8f", pvalue(a, b)), check.Equals, "0.31731051")
}
"-chunked-hgvs-matrix=true",
"-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
"-chi2-case-control-column=CC",
- "-chi2-p-value=0.05",
+ "-chi2-p-value=0.5",
"-min-coverage=0.75",
"-input-dir=" + slicedir,
"-output-dir=" + npydir,
"-chunked-onehot=true",
"-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
"-chi2-case-control-column=CC",
- "-chi2-p-value=0.05",
+ "-chi2-p-value=0.5",
"-min-coverage=0.75",
"-input-dir=" + slicedir,
"-output-dir=" + npydir,
"-single-onehot=true",
"-chi2-case-control-file=" + tmpdir + "/casecontrol.tsv",
"-chi2-case-control-column=CC",
- "-chi2-p-value=0.05",
+ "-chi2-p-value=0.5",
"-min-coverage=0.75",
"-input-dir=" + slicedir,
"-output-dir=" + npydir,
cases = append(cases, c)
}
return len(cases) >= cmd.minCoverage &&
- (pvalue(cases, col0) <= cmd.chi2PValue || pvalue(cases, col1) <= cmd.chi2PValue)
+ (pvalue(col0, cases) <= cmd.chi2PValue || pvalue(col1, cases) <= cmd.chi2PValue)
}
func writeNumpyInt32(fnm string, out []int32, rows, cols int) error {
var xref []onehotXref
for homcol := 4; homcol < len(obs); homcol += 2 {
p := [2]float64{
- pvalue(cmd.chi2Cases, obs[homcol]),
- pvalue(cmd.chi2Cases, obs[homcol+1]),
+ pvalue(obs[homcol], cmd.chi2Cases),
+ pvalue(obs[homcol+1], cmd.chi2Cases),
}
if cmd.chi2PValue < 1 && !(p[0] < cmd.chi2PValue || p[1] < cmd.chi2PValue) {
continue