1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 "github.com/kshedden/statmodel/glm"
14 "github.com/kshedden/statmodel/statmodel"
15 "gonum.org/v1/gonum/stat"
16 "gonum.org/v1/gonum/stat/distuv"
19 var glmConfig = &glm.Config{
20 Family: glm.NewFamily(glm.BinomialFamily),
23 Log: log.New(io.Discard, "", 0),
26 func normalize(a []float64) {
27 mean, std := stat.MeanStdDev(a, nil)
29 a[i] = (x - mean) / std
33 // Logistic regression.
35 // onehot is the observed outcome, in same order as sampleInfo, but
36 // shorter because it only has entries for samples with
38 func glmPvalueFunc(sampleInfo []sampleInfo, nPCA int) func(onehot []bool) float64 {
39 pcaNames := make([]string, 0, nPCA)
40 data := make([][]statmodel.Dtype, 0, nPCA)
41 for pca := 0; pca < nPCA; pca++ {
42 series := make([]statmodel.Dtype, 0, len(sampleInfo))
43 for _, si := range sampleInfo {
45 series = append(series, si.pcaComponents[pca])
49 data = append(data, series)
50 pcaNames = append(pcaNames, fmt.Sprintf("pca%d", pca))
53 outcome := make([]statmodel.Dtype, 0, len(sampleInfo))
54 constants := make([]statmodel.Dtype, 0, len(sampleInfo))
56 for _, si := range sampleInfo {
59 outcome = append(outcome, 1)
61 outcome = append(outcome, 0)
63 constants = append(constants, 1)
67 data = append([][]statmodel.Dtype{outcome, constants}, data...)
68 names := append([]string{"outcome", "constants"}, pcaNames...)
69 dataset := statmodel.NewDataset(data, names)
71 model, err := glm.NewGLM(dataset, "outcome", names[1:], glmConfig)
74 return func([]bool) float64 { return math.NaN() }
76 resultCov := model.Fit()
77 logCov := resultCov.LogLike()
79 return func(onehot []bool) (p float64) {
82 // typically "matrix singular or near-singular with condition number +Inf"
87 variant := make([]statmodel.Dtype, 0, len(sampleInfo))
89 for _, si := range sampleInfo {
92 variant = append(variant, 1)
94 variant = append(variant, 0)
100 data := append([][]statmodel.Dtype{data[0], variant}, data[1:]...)
101 names := append([]string{"outcome", "variant"}, names[1:]...)
102 dataset := statmodel.NewDataset(data, names)
104 model, err := glm.NewGLM(dataset, "outcome", names[1:], glmConfig)
108 resultComp := model.Fit()
109 logComp := resultComp.LogLike()
110 dist := distuv.ChiSquared{K: 1}
111 return dist.Survival(-2 * (logCov - logComp))