From 4c1958b0db98c7628e91265dd7e99c0c840c1206 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 13 Oct 2022 10:43:36 -0400 Subject: [PATCH] 19524: Generalize plot colors a little. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- plot.go | 81 ++++++--------------------------------------------------- plot.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 73 deletions(-) create mode 100644 plot.py diff --git a/plot.go b/plot.go index aa4335fec6..51f5c8236c 100644 --- a/plot.go +++ b/plot.go @@ -5,6 +5,7 @@ package lightning import ( + _ "embed" "flag" "fmt" "io" @@ -15,6 +16,9 @@ import ( type pythonPlot struct{} +//go:embed plot.py +var plotscript string + func (cmd *pythonPlot) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { var err error defer func() { @@ -27,7 +31,8 @@ func (cmd *pythonPlot) RunCommand(prog string, args []string, stdin io.Reader, s projectUUID := flags.String("project", "", "project `UUID` for output data") inputFilename := flags.String("i", "-", "input `file`") sampleListFilename := flags.String("samples", "", "use second column of `samples.csv` as complete list of sample IDs") - colormapFilename := flags.String("colormap", "", "use first two columns of `colormap.csv` as id->color mapping") + phenotypeFilename := flags.String("phenotype", "", "use `phenotype.csv` as id->phenotype mapping (column 0 is sample id)") + phenotypeColumn := flags.Int("phenotype-column", 1, "0-based column `index` of phenotype in phenotype.csv file") priority := flags.Int("priority", 500, "container request priority") err = flags.Parse(args) if err == flag.ErrHelp { @@ -51,12 +56,12 @@ func (cmd *pythonPlot) RunCommand(prog string, args []string, stdin io.Reader, s }, }, } - err = runner.TranslatePaths(inputFilename, sampleListFilename, colormapFilename) + err = runner.TranslatePaths(inputFilename, sampleListFilename, phenotypeFilename) if err != nil { return 1 } runner.Prog = "python3" - runner.Args = []string{"/plot.py", *inputFilename, *sampleListFilename, *colormapFilename, "/mnt/output/plot.png"} + runner.Args = []string{"/plot.py", *inputFilename, *sampleListFilename, *phenotypeFilename, fmt.Sprintf("%d", *phenotypeColumn), "/mnt/output/plot.png"} var output string output, err = runner.Run() if err != nil { @@ -65,73 +70,3 @@ func (cmd *pythonPlot) RunCommand(prog string, args []string, stdin io.Reader, s fmt.Fprintln(stdout, output+"/plot.png") return 0 } - -var plotscript = ` -import csv -import os -import os.path -import scipy -import sys - -infile = sys.argv[1] -X = scipy.load(infile) - -colors = None -if sys.argv[2]: - samples = [] - labels = {} - with open(sys.argv[2], 'rt') as samplelist: - for row in csv.reader(samplelist): - id = row[1] - samples.append(id) - with open(sys.argv[3], 'rt') as colormap: - for row in csv.reader(colormap): - tag = row[0] - label = row[1] - for id in samples: - if tag in id: - labels[id] = label - colors = [] - labelcolors = { - 'PUR': 'firebrick', - 'CLM': 'firebrick', - 'MXL': 'firebrick', - 'PEL': 'firebrick', - 'TSI': 'green', - 'IBS': 'green', - 'CEU': 'green', - 'GBR': 'green', - 'FIN': 'green', - 'LWK': 'coral', - 'MSL': 'coral', - 'GWD': 'coral', - 'YRI': 'coral', - 'ESN': 'coral', - 'ACB': 'coral', - 'ASW': 'coral', - 'KHV': 'royalblue', - 'CDX': 'royalblue', - 'CHS': 'royalblue', - 'CHB': 'royalblue', - 'JPT': 'royalblue', - 'STU': 'blueviolet', - 'ITU': 'blueviolet', - 'BEB': 'blueviolet', - 'GIH': 'blueviolet', - 'PJL': 'blueviolet', - } - for id in samples: - if (id in labels) and (labels[id] in labelcolors): - colors.append(labelcolors[labels[id]]) - else: - colors.append('black') - -from matplotlib.figure import Figure -from matplotlib.patches import Polygon -from matplotlib.backends.backend_agg import FigureCanvasAgg -fig = Figure() -ax = fig.add_subplot(111) -ax.scatter(X[:,0], X[:,1], c=colors, s=60, marker='o', alpha=0.5) -canvas = FigureCanvasAgg(fig) -canvas.print_figure(sys.argv[4], dpi=80) -` diff --git a/plot.py b/plot.py new file mode 100644 index 0000000000..eadcd4cab7 --- /dev/null +++ b/plot.py @@ -0,0 +1,80 @@ +# Copyright (C) The Lightning Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +import csv +import os +import os.path +import scipy +import sys + +infile = sys.argv[1] +X = scipy.load(infile) + +colors = None +if sys.argv[2]: + samples = [] + labels = {} + with open(sys.argv[2], 'rt', newline='') as samplelist: + for row in csv.reader(samplelist): + sampleid = row[1] + samples.append(sampleid) + phenotype_column = int(sys.argv[4]) + with open(sys.argv[3], 'rt', newline='') as phenotype: + dialect = csv.Sniffer().sniff(phenotype.read(1024)) + phenotype.seek(0) + for row in csv.reader(phenotype, dialect): + tag = row[0] + label = row[phenotype_column] + for sampleid in samples: + if tag in sampleid: + labels[sampleid] = label + colors = [] + labelcolors = { + 'PUR': 'firebrick', + 'CLM': 'firebrick', + 'MXL': 'firebrick', + 'PEL': 'firebrick', + '1': 'firebrick', + 'TSI': 'green', + 'IBS': 'green', + 'CEU': 'green', + 'GBR': 'green', + 'FIN': 'green', + '2': 'green', + 'LWK': 'coral', + 'MSL': 'coral', + 'GWD': 'coral', + 'YRI': 'coral', + 'ESN': 'coral', + 'ACB': 'coral', + 'ASW': 'coral', + '3': 'coral', + 'KHV': 'royalblue', + 'CDX': 'royalblue', + 'CHS': 'royalblue', + 'CHB': 'royalblue', + 'JPT': 'royalblue', + '4': 'royalblue', + 'STU': 'blueviolet', + 'ITU': 'blueviolet', + 'BEB': 'blueviolet', + 'GIH': 'blueviolet', + 'PJL': 'blueviolet', + '5': 'blueviolet', + '6': 'navy', + } + for sampleid in samples: + if (sampleid in labels) and (labels[sampleid] in labelcolors): + colors.append(labelcolors[labels[sampleid]]) + else: + colors.append('black') + +from matplotlib.figure import Figure +from matplotlib.patches import Polygon +from matplotlib.backends.backend_agg import FigureCanvasAgg +fig = Figure() +ax = fig.add_subplot(111) +ax.scatter(X[:,0], X[:,1], c=colors, s=60, marker='o', alpha=0.5) +canvas = FigureCanvasAgg(fig) +canvas.print_figure(sys.argv[5], dpi=80) -- 2.30.2