Add example script.
authorTom Clegg <tom@tomclegg.ca>
Wed, 4 Mar 2020 20:31:43 +0000 (15:31 -0500)
committerTom Clegg <tom@tomclegg.ca>
Wed, 4 Mar 2020 20:31:43 +0000 (15:31 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

example-1kg-pca.py [deleted file]
example-1kg-plot.py [deleted file]
example-su92l-1kg.sh [new file with mode: 0644]

diff --git a/example-1kg-pca.py b/example-1kg-pca.py
deleted file mode 100755 (executable)
index 4e715d0..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-
-"""
-lightning gvcf2numpy -tag-library ~/keep/by_id/su92l-4zz18-92bx4zjg5hgs3yc/tagset.fa.gz -ref ./hg38.fa.gz ~/keep/by_id/su92l-4zz18-s3e6as6uzsoocsb > example.npy
-example-1k-pca.py example.npy
-example-1k-plot.py example.npy.pca.npy sample_info.csv ~/keep/by_id/su92l-4zz18-s3e6as6uzsoocsb
-ls -l example.npy.pca.npy.png
-"""
-
-import sys
-infile = sys.argv[1]
-
-import scipy
-X = scipy.load(infile)
-
-from sklearn.decomposition import PCA
-pca = PCA(n_components=4)
-X = pca.fit_transform(X)
-scipy.save(infile+".pca.npy", X)
diff --git a/example-1kg-plot.py b/example-1kg-plot.py
deleted file mode 100755 (executable)
index dcfce78..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-
-import csv
-import os
-import scipy
-import sys
-
-infile = sys.argv[1]
-X = scipy.load(infile)
-
-colors = None
-if len(sys.argv) > 2:
-    labels = {}
-    for fnm in os.listdir(sys.argv[3]):
-        if '.2.fasta' not in fnm:
-            labels[fnm] = '---'
-    if len(labels) != len(X):
-        raise "len(inputdir) != len(inputarray)"
-    with open(sys.argv[2], 'rb') as csvfile:
-        for row in csv.reader(csvfile):
-            ident=row[0]
-            label=row[1]
-            for fnm in labels:
-                if row[0] in fnm:
-                    labels[fnm] = row[1]
-    colors = []
-    labelcolors = {
-        'PUR': 'firebrick',
-        'CLM': 'firebrick',
-        'MXL': 'firebrick',
-        'PEL': 'firebrick',
-        'TSI': 'green',
-        'IBS': 'green',
-        'CEU': 'green',
-        'GBR': 'green',
-        'FIN': 'green',
-        'LWK': 'coral',
-        'MSL': 'coral',
-        'GWD': 'coral',
-        'YRI': 'coral',
-        'ESN': 'coral',
-        'ACB': 'coral',
-        'ASW': 'coral',
-        'KHV': 'royalblue',
-        'CDX': 'royalblue',
-        'CHS': 'royalblue',
-        'CHB': 'royalblue',
-        'JPT': 'royalblue',
-        'STU': 'blueviolet',
-        'ITU': 'blueviolet',
-        'BEB': 'blueviolet',
-        'GIH': 'blueviolet',
-        'PJL': 'blueviolet',
-    }
-    for fnm in sorted(labels.keys()):
-        colors.append(labelcolors[labels[fnm]])
-
-from matplotlib.figure import Figure
-from matplotlib.patches import Polygon
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-fig = Figure()
-ax = fig.add_subplot(111)
-ax.scatter(X[:,0], X[:,1], c=colors, s=60, marker='o', alpha=0.5)
-canvas = FigureCanvasAgg(fig)
-canvas.print_figure(infile+".png", dpi=80)
diff --git a/example-su92l-1kg.sh b/example-su92l-1kg.sh
new file mode 100644 (file)
index 0000000..147585c
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+go run . build-docker-image
+arv keep docker lightning-runtime
+
+project=su92l-j7d0g-jzei0m9yvgauhjf
+gvcf=su92l-4zz18-ykpcoea5nisz74f
+fasta=su92l-4zz18-s3e6as6uzsoocsb
+tags=su92l-4zz18-92bx4zjg5hgs3yc
+
+go run . import       -project ${project} \
+   -tag-library ~/keep/by_id/${tags}/tagset.fa.gz \
+   ~/keep/by_id/${fasta}
+go run . filter       -project ${project} \
+   -i ~/keep/by_id/su92l-4zz18-fcyucnod8y4515p/library.gob \
+   -min-coverage 0.9 -max-variants 30
+go run . export-numpy -project ${project} \
+   -i ~/keep/by_id/su92l-4zz18-l40xcd2l6dmphaj/library.gob
+go run . pca          -project ${project} \
+   -i ~/keep/by_id/su92l-4zz18-i6fzfoxpdh38yk4/library.npy
+go run . plot         -project ${project} \
+   -i ~/keep/by_id/su92l-4zz18-zqfo7qc3tadh6zb/pca.npy \
+   -labels-csv ~/keep/by_id/${gvcf}/sample_info.csv \
+   -sample-fasta-dir ~/keep/by_id/${fasta}