Fix loss of precision in p-value calculation.
authorTom Clegg <tom@curii.com>
Fri, 3 Jun 2022 04:05:42 +0000 (00:05 -0400)
committerTom Clegg <tom@curii.com>
Fri, 3 Jun 2022 04:05:42 +0000 (00:05 -0400)
refs #19014

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

chisquare.go
chisquare_test.go

index a044ced1eb127532124297b3604c7360423e552c..65d11e6f06b6ff3f382bea665ff45d09e58872b5 100644 (file)
@@ -40,5 +40,5 @@ func pvalue(x, y []bool) float64 {
                d := obs[i] - exp[i]
                sum += d * d / exp[i]
        }
-       return 1 - chisquared.CDF(sum)
+       return chisquared.Survival(sum)
 }
index c9db0c6710f0a9d7b9070daeb848516e3f7fd295..823023507ab629c17e33d7c544cd3befb219997d 100644 (file)
@@ -48,4 +48,21 @@ func (s *pvalueSuite) TestPvalue(c *check.C) {
                a[i] = !a[i]
        }
        c.Check(fmt.Sprintf("%.8f", pvalue(a, b)), check.Equals, "0.31731051")
+
+       for _, sz := range []int{128, 1024, 4096, 16384} {
+               c.Logf("sz = %d", sz)
+               a = make([]bool, sz)
+               b = make([]bool, sz)
+               c.Check(fmt.Sprintf("%.8f", pvalue(a, b)), check.Equals, "1.00000000")
+               for i := 0; i < len(a)/23; i++ {
+                       a[i] = true
+               }
+               for i := 0; i < len(a)/2; i++ {
+                       b[i] = true
+               }
+               c.Logf("pvalue(a,b) == %e", pvalue(a, b))
+               c.Logf("pvalue(b,a) == %e", pvalue(b, a))
+               c.Check(pvalue(a, b), check.Not(check.Equals), float64(0))
+               c.Check(pvalue(b, a), check.Not(check.Equals), float64(0))
+       }
 }