1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 "github.com/kshedden/statmodel/glm"
14 "github.com/kshedden/statmodel/statmodel"
15 "gonum.org/v1/gonum/stat/distuv"
18 var glmConfig = &glm.Config{
19 Family: glm.NewFamily(glm.BinomialFamily),
22 Log: log.New(io.Discard, "", 0),
25 // Logistic regression.
27 // onehot is the observed outcome, in same order as sampleInfo, but
28 // shorter because it only has entries for samples with
30 func pvalueGLM(sampleInfo []sampleInfo, onehot []bool, nPCA int) (p float64) {
31 pcaNames := make([]string, 0, nPCA)
32 data := make([][]statmodel.Dtype, 0, nPCA)
33 for pca := 0; pca < nPCA; pca++ {
34 series := make([]statmodel.Dtype, 0, len(sampleInfo))
35 for _, si := range sampleInfo {
37 series = append(series, si.pcaComponents[pca])
40 data = append(data, series)
41 pcaNames = append(pcaNames, fmt.Sprintf("pca%d", pca))
44 variant := make([]statmodel.Dtype, 0, len(sampleInfo))
45 outcome := make([]statmodel.Dtype, 0, len(sampleInfo))
46 constants := make([]statmodel.Dtype, 0, len(sampleInfo))
48 for _, si := range sampleInfo {
51 variant = append(variant, 1)
53 variant = append(variant, 0)
56 outcome = append(outcome, 1)
58 outcome = append(outcome, 0)
60 constants = append(constants, 1)
64 data = append(data, variant, outcome, constants)
65 dataset := statmodel.NewDataset(data, append(pcaNames, "variant", "outcome", "constants"))
69 // typically "matrix singular or near-singular with condition number +Inf"
73 model, err := glm.NewGLM(dataset, "outcome", append([]string{"constants"}, pcaNames...), glmConfig)
77 resultCov := model.Fit()
78 logCov := resultCov.LogLike()
79 model, err = glm.NewGLM(dataset, "outcome", append([]string{"constants", "variant"}, pcaNames...), glmConfig)
83 resultComp := model.Fit()
84 logComp := resultComp.LogLike()
85 dist := distuv.ChiSquared{K: 1}
86 return dist.Survival(-2 * (logCov - logComp))