"filter": &filtercmd{},
"build-docker-image": &buildDockerImage{},
"plot": &pythonPlot{},
+ "pca-plot": &pythonPlot{},
+ "manhattan-plot": &manhattanPlot{},
"diff-fasta": &diffFasta{},
"stats": &statscmd{},
"merge": &merger{},
RUN DEBIAN_FRONTEND=noninteractive \
apt-get update && \
apt-get dist-upgrade -y && \
- apt-get install -y --no-install-recommends bcftools bedtools samtools python2 python3-sklearn python3-matplotlib ca-certificates && \
- apt-get clean
+ apt-get install -y --no-install-recommends bcftools bedtools samtools python2 python3-sklearn python3-matplotlib python3-pip ca-certificates && \
+ apt-get clean && \
+ pip3 install qmplot
`), 0644)
if err != nil {
fmt.Fprint(stderr, err)
--- /dev/null
+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lightning
+
+import (
+ _ "embed"
+ "flag"
+ "fmt"
+ "io"
+ "os/exec"
+ "strings"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+)
+
+type manhattanPlot struct{}
+
+//go:embed manhattan.py
+var manhattanPy string
+
+func (cmd *manhattanPlot) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+ var err error
+ defer func() {
+ if err != nil {
+ fmt.Fprintf(stderr, "%s\n", err)
+ }
+ }()
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(stderr)
+ projectUUID := flags.String("project", "", "project `UUID` for output data")
+ inputDirectory := flags.String("i", "-", "input `directory` (output of slice-numpy -single-onehot)")
+ outputFilename := flags.String("o", "", "output `filename` (e.g., './plot.png')")
+ priority := flags.Int("priority", 500, "container request priority")
+ runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
+ err = flags.Parse(args)
+ if err == flag.ErrHelp {
+ err = nil
+ return 0
+ } else if err != nil {
+ return 2
+ } else if flags.NArg() > 0 {
+ err = fmt.Errorf("errant command line arguments after parsed flags: %v", flags.Args())
+ return 2
+ }
+
+ runner := arvadosContainerRunner{
+ Name: "lightning manhattan",
+ Client: arvados.NewClientFromEnv(),
+ ProjectUUID: *projectUUID,
+ RAM: 4 << 30,
+ VCPUs: 1,
+ Priority: *priority,
+ Mounts: map[string]map[string]interface{}{
+ "/manhattan.py": map[string]interface{}{
+ "kind": "text",
+ "content": manhattanPy,
+ },
+ },
+ }
+ if !*runlocal {
+ err = runner.TranslatePaths(inputDirectory)
+ if err != nil {
+ return 1
+ }
+ *outputFilename = "/mnt/output/plot.png"
+ }
+ args = []string{
+ *inputDirectory,
+ *outputFilename,
+ }
+ if *runlocal {
+ if *outputFilename == "" {
+ fmt.Fprintln(stderr, "error: must specify -o filename.png in local mode (or try -help)")
+ return 1
+ }
+ cmd := exec.Command("python3", append([]string{"-"}, args...)...)
+ cmd.Stdin = strings.NewReader(manhattanPy)
+ cmd.Stdout = stdout
+ cmd.Stderr = stderr
+ err = cmd.Run()
+ if err != nil {
+ return 1
+ }
+ return 0
+ }
+ runner.Prog = "python3"
+ runner.Args = append([]string{"/manhattan.py"}, args...)
+ var output string
+ output, err = runner.Run()
+ if err != nil {
+ return 1
+ }
+ fmt.Fprintln(stdout, output+"/plot.png")
+ return 0
+}
--- /dev/null
+# Copyright (C) The Lightning Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+import csv
+import os
+import sys
+
+import matplotlib
+import numpy
+import pandas
+import qmplot
+
+(_,
+ input_path,
+ output_path,
+ ) = sys.argv
+columns = numpy.load(os.path.join(input_path, 'onehot-columns.npy'))
+
+# pvalue maps tag# => [pvalue1, pvalue2, ...] (one het p-value and one hom p-value for each tile variant)
+pvalue = {}
+for i in range(columns.shape[1]):
+ tag = columns[0,i]
+ x = pvalue.get(tag, [])
+ x.append(pow(10, -columns[4,i] / 1000000))
+ pvalue[tag] = x
+
+# tilepos maps tag# => (chromosome, position)
+tilepos = {}
+for dirent in os.scandir(input_path):
+ if dirent.name.endswith('.annotations.csv'):
+ with open(dirent, 'rt', newline='') as annotations:
+ for annotation in csv.reader(annotations):
+ # 500000,0,2,=,chr1,160793649,,,
+ if annotation[3] == "=":
+ tilepos[int(annotation[0])] = (annotation[4], int(annotation[5]))
+
+series = {"#CHROM": [], "POS": [], "P": []}
+for tag, chrpos in sorted(tilepos.items(), key=lambda item: (item[1][0].lstrip('chr').zfill(2), item[1][1])):
+ for p in pvalue.get(tag, []):
+ series["#CHROM"].append(chrpos[0])
+ series["POS"].append(chrpos[1])
+ series["P"].append(p)
+
+qmplot.manhattanplot(data=pandas.DataFrame(series))
+matplotlib.pyplot.savefig(output_path)
"flag"
"fmt"
"io"
- _ "net/http/pprof"
"os/exec"
"strings"
type pythonPlot struct{}
-//go:embed plot.py
-var plotscript string
+//go:embed pca_plot.py
+var pcaPlotPy string
func (cmd *pythonPlot) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
var err error
}
runner := arvadosContainerRunner{
- Name: "lightning plot",
+ Name: "lightning pca-plot",
Client: arvados.NewClientFromEnv(),
ProjectUUID: *projectUUID,
RAM: 4 << 30,
VCPUs: 1,
Priority: *priority,
Mounts: map[string]map[string]interface{}{
- "/plot.py": map[string]interface{}{
+ "/pca_plot.py": map[string]interface{}{
"kind": "text",
- "content": plotscript,
+ "content": pcaPlotPy,
},
},
}
return 1
}
cmd := exec.Command("python3", append([]string{"-"}, args...)...)
- cmd.Stdin = strings.NewReader(plotscript)
+ cmd.Stdin = strings.NewReader(pcaPlotPy)
cmd.Stdout = stdout
cmd.Stderr = stderr
err = cmd.Run()
return 0
}
runner.Prog = "python3"
- runner.Args = append([]string{"/plot.py"}, args...)
+ runner.Args = append([]string{"/pca_plot.py"}, args...)
var output string
output, err = runner.Run()
if err != nil {