"""
lightning gvcf2numpy -tag-library ~/keep/by_id/su92l-4zz18-92bx4zjg5hgs3yc/tagset.fa.gz -ref ./hg38.fa.gz ~/keep/by_id/su92l-4zz18-s3e6as6uzsoocsb > example.npy
+example-1k-pca.py example.npy
+example-1k-plot.py example.npy.pca.npy sample_info.csv ~/keep/by_id/su92l-4zz18-s3e6as6uzsoocsb
+ls -l example.npy.pca.npy.png
"""
import sys
from sklearn.decomposition import PCA
pca = PCA(n_components=4)
X = pca.fit_transform(X)
-
-from matplotlib.figure import Figure
-from matplotlib.patches import Polygon
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-fig = Figure()
-ax = fig.add_subplot(111)
-ax.scatter(X[:,0], X[:,1])
-canvas = FigureCanvasAgg(fig)
-canvas.print_figure(infile+".png", dpi=80)
+scipy.save(infile+".pca.npy", X)
--- /dev/null
+#!/usr/bin/env python
+
+import csv
+import os
+import scipy
+import sys
+
+infile = sys.argv[1]
+X = scipy.load(infile)
+
+colors = None
+if len(sys.argv) > 2:
+ labels = {}
+ for fnm in os.listdir(sys.argv[3]):
+ if '.2.fasta' not in fnm:
+ labels[fnm] = '---'
+ if len(labels) != len(X):
+ raise "len(inputdir) != len(inputarray)"
+ with open(sys.argv[2], 'rb') as csvfile:
+ for row in csv.reader(csvfile):
+ ident=row[0]
+ label=row[1]
+ for fnm in labels:
+ if row[0] in fnm:
+ labels[fnm] = row[1]
+ colors = []
+ labelcolors = {
+ 'PUR': 'firebrick',
+ 'CLM': 'firebrick',
+ 'MXL': 'firebrick',
+ 'PEL': 'firebrick',
+ 'TSI': 'green',
+ 'IBS': 'green',
+ 'CEU': 'green',
+ 'GBR': 'green',
+ 'FIN': 'green',
+ 'LWK': 'coral',
+ 'MSL': 'green',
+ 'GWD': 'green',
+ 'YRI': 'green',
+ 'ESN': 'green',
+ 'ACB': 'green',
+ 'ASW': 'green',
+ 'KHV': 'royalblue',
+ 'CDX': 'royalblue',
+ 'CHS': 'royalblue',
+ 'CHB': 'royalblue',
+ 'JPT': 'royalblue',
+ 'STU': 'blueviolet',
+ 'ITU': 'blueviolet',
+ 'BEB': 'blueviolet',
+ 'GIH': 'blueviolet',
+ 'PJL': 'blueviolet',
+ }
+ for fnm in sorted(labels.keys()):
+ colors.append(labelcolors[labels[fnm]])
+
+from matplotlib.figure import Figure
+from matplotlib.patches import Polygon
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+fig = Figure()
+ax = fig.add_subplot(111)
+ax.scatter(X[:,0], X[:,1], c=colors, s=60, marker='o', alpha=0.5)
+canvas = FigureCanvasAgg(fig)
+canvas.print_figure(infile+".png", dpi=80)