Color PCA plot.
authorTom Clegg <tom@tomclegg.ca>
Mon, 3 Feb 2020 15:25:20 +0000 (10:25 -0500)
committerTom Clegg <tom@tomclegg.ca>
Mon, 3 Feb 2020 15:25:20 +0000 (10:25 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

example-1kg-pca.py
example-1kg-plot.py [new file with mode: 0644]

index 0901e864b0c4ad6ae4990393d83ae1f1c9498b51..4e715d016ed558586dd77328d25e73e82f9b355a 100755 (executable)
@@ -2,6 +2,9 @@
 
 """
 lightning gvcf2numpy -tag-library ~/keep/by_id/su92l-4zz18-92bx4zjg5hgs3yc/tagset.fa.gz -ref ./hg38.fa.gz ~/keep/by_id/su92l-4zz18-s3e6as6uzsoocsb > example.npy
+example-1k-pca.py example.npy
+example-1k-plot.py example.npy.pca.npy sample_info.csv ~/keep/by_id/su92l-4zz18-s3e6as6uzsoocsb
+ls -l example.npy.pca.npy.png
 """
 
 import sys
@@ -13,12 +16,4 @@ X = scipy.load(infile)
 from sklearn.decomposition import PCA
 pca = PCA(n_components=4)
 X = pca.fit_transform(X)
-
-from matplotlib.figure import Figure
-from matplotlib.patches import Polygon
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-fig = Figure()
-ax = fig.add_subplot(111)
-ax.scatter(X[:,0], X[:,1])
-canvas = FigureCanvasAgg(fig)
-canvas.print_figure(infile+".png", dpi=80)
+scipy.save(infile+".pca.npy", X)
diff --git a/example-1kg-plot.py b/example-1kg-plot.py
new file mode 100644 (file)
index 0000000..8fe5f06
--- /dev/null
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import csv
+import os
+import scipy
+import sys
+
+infile = sys.argv[1]
+X = scipy.load(infile)
+
+colors = None
+if len(sys.argv) > 2:
+    labels = {}
+    for fnm in os.listdir(sys.argv[3]):
+        if '.2.fasta' not in fnm:
+            labels[fnm] = '---'
+    if len(labels) != len(X):
+        raise "len(inputdir) != len(inputarray)"
+    with open(sys.argv[2], 'rb') as csvfile:
+        for row in csv.reader(csvfile):
+            ident=row[0]
+            label=row[1]
+            for fnm in labels:
+                if row[0] in fnm:
+                    labels[fnm] = row[1]
+    colors = []
+    labelcolors = {
+        'PUR': 'firebrick',
+        'CLM': 'firebrick',
+        'MXL': 'firebrick',
+        'PEL': 'firebrick',
+        'TSI': 'green',
+        'IBS': 'green',
+        'CEU': 'green',
+        'GBR': 'green',
+        'FIN': 'green',
+        'LWK': 'coral',
+        'MSL': 'green',
+        'GWD': 'green',
+        'YRI': 'green',
+        'ESN': 'green',
+        'ACB': 'green',
+        'ASW': 'green',
+        'KHV': 'royalblue',
+        'CDX': 'royalblue',
+        'CHS': 'royalblue',
+        'CHB': 'royalblue',
+        'JPT': 'royalblue',
+        'STU': 'blueviolet',
+        'ITU': 'blueviolet',
+        'BEB': 'blueviolet',
+        'GIH': 'blueviolet',
+        'PJL': 'blueviolet',
+    }
+    for fnm in sorted(labels.keys()):
+        colors.append(labelcolors[labels[fnm]])
+
+from matplotlib.figure import Figure
+from matplotlib.patches import Polygon
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+fig = Figure()
+ax = fig.add_subplot(111)
+ax.scatter(X[:,0], X[:,1], c=colors, s=60, marker='o', alpha=0.5)
+canvas = FigureCanvasAgg(fig)
+canvas.print_figure(infile+".png", dpi=80)