1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
13 "git.arvados.org/arvados.git/sdk/go/arvados"
16 type numpyComVar struct{}
18 func (cmd *numpyComVar) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
22 fmt.Fprintf(stderr, "%s\n", err)
25 flags := flag.NewFlagSet("", flag.ContinueOnError)
26 flags.SetOutput(stderr)
27 projectUUID := flags.String("project", "", "project `UUID` for output data")
28 inputFilename := flags.String("i", "-", "numpy matrix `file`")
29 priority := flags.Int("priority", 500, "container request priority")
30 annotationsFilename := flags.String("annotations", "", "annotations tsv `file`")
31 maxResults := flags.Int("max-results", 256, "maximum number of tile variants to output")
32 minFrequency := flags.Float64("min-frequency", 0.4, "minimum allele frequency")
33 maxFrequency := flags.Float64("max-frequency", 0.6, "maximum allele frequency")
34 err = flags.Parse(args)
35 if err == flag.ErrHelp {
38 } else if err != nil {
42 runner := arvadosContainerRunner{
43 Name: "lightning numpy-comvar",
44 Client: arvados.NewClientFromEnv(),
45 ProjectUUID: *projectUUID,
50 err = runner.TranslatePaths(inputFilename, annotationsFilename)
54 runner.Prog = "python3"
55 runner.Args = []string{"-c", `import sys
60 numpyFile = sys.argv[1]
61 annotationsFile = sys.argv[2]
62 outputFile = sys.argv[3]
63 maxResults = int(sys.argv[4])
64 minFrequency = float(sys.argv[5])
65 maxFrequency = float(sys.argv[6])
67 out = open(outputFile, 'w')
69 m = scipy.load(numpyFile)
72 mincount = m.shape[0] * 2 * minFrequency
73 maxcount = m.shape[0] * 2 * maxFrequency
74 for tag in range(m.shape[1] // 2):
76 counter = [0, 0, 0, 0, 0]
77 for genome in range(m.shape[0]):
78 for phase in range(2):
79 variant = m[genome][tag*2+phase]
80 if variant > 0 and variant < len(counter):
82 example[variant] = genome
83 for variant, count in enumerate(counter):
84 if count >= mincount and count <= maxcount:
85 commonvariants[tag,variant] = example[variant]
86 # sys.stderr.write('tag {} variant {} count {} example {} have {} commonvariants\n'.format(tag, variant, count, example[variant], len(commonvariants)))
87 if len(commonvariants) >= maxResults:
91 with open(annotationsFile, newline='') as tsvfile:
92 rdr = csv.reader(tsvfile, delimiter='\t', quotechar='"')
96 if (tag, variant) in commonvariants:
97 found[tag, variant] = True
98 out.write(','.join(row + [str(commonvariants[tag, variant])]) + '\n')
99 elif len(found) >= len(commonvariants):
100 sys.stderr.write('done\n')
104 `, *inputFilename, *annotationsFilename, "/mnt/output/commonvariants.csv", fmt.Sprintf("%d", *maxResults), fmt.Sprintf("%f", *minFrequency), fmt.Sprintf("%f", *maxFrequency)}
106 output, err = runner.Run()
110 fmt.Fprintln(stdout, output+"/commonvariants.csv")