Fix some tests.
[lightning.git] / manhattan.py
1 # Copyright (C) The Lightning Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 import csv
6 import os
7 import sys
8
9 import matplotlib
10 import numpy
11 import pandas
12 import qmplot
13
14 (_,
15  input_path,
16  output_path,
17  csv_threshold_str,
18  csv_output_path,
19  ) = sys.argv
20 csv_threshold = float(csv_threshold_str)
21
22 print(f'loading onehot-columns.npy', file=sys.stderr)
23 columns = numpy.load(os.path.join(input_path, 'onehot-columns.npy'))
24
25 # pvalue maps tag# => [pvalue1, pvalue2, ...] (one het p-value and one hom p-value for each tile variant)
26 pvalue = {}
27 for i in range(columns.shape[1]):
28     tag = columns[0,i]
29     x = pvalue.get(tag, [])
30     x.append(pow(10, -columns[4,i] / 1000000))
31     pvalue[tag] = x
32
33 print(f'building tilepos dict', file=sys.stderr)
34 # tilepos maps tag# => (chromosome, position)
35 tilepos = {}
36 for dirent in os.scandir(input_path):
37     if dirent.name.endswith('.annotations.csv'):
38         with open(dirent, 'rt', newline='') as annotations:
39             for annotation in csv.reader(annotations):
40                 # 500000,0,2,=,chr1,160793649,,,
41                 if annotation[3] == "=":
42                     tilepos[int(annotation[0])] = (annotation[4], int(annotation[5]))
43
44 if csv_threshold > 0 and csv_output_path != "":
45     print(f'writing csv {csv_output_path}', file=sys.stderr)
46     with open(csv_output_path, 'wt') as f:
47         for tag, chrpos in sorted(tilepos.items(), key=lambda item: (item[1][0][-1] > '9', item[1][0].lstrip('chr').zfill(2), item[1][1])):
48             for p in pvalue.get(tag, []):
49                 if p < pow(10, -csv_threshold):
50                     print(f'{tag},{chrpos[0]},{chrpos[1]},{p}', file=f)
51
52 print(f'building series dict', file=sys.stderr)
53 series = {"#CHROM": [], "POS": [], "P": []}
54 for tag, chrpos in sorted(tilepos.items(), key=lambda item: (item[1][0][-1] > '9', item[1][0].lstrip('chr').zfill(2), item[1][1])):
55     for p in pvalue.get(tag, []):
56         series["#CHROM"].append(chrpos[0])
57         series["POS"].append(chrpos[1])
58         series["P"].append(p)
59
60 chroms = {}
61 for chrom in series["#CHROM"]:
62     chroms[chrom] = True
63 chroms[None] = True
64
65 print(f'generating plots', file=sys.stderr)
66 for chrom in chroms.keys():
67     output_file = output_path
68     xlabel = "Chromosome"
69     if chrom:
70         output_file = f'.{chrom}.'.join(output_file.rsplit('.', 1))
71         xlabel = f'position on {chrom}'
72     qmplot.manhattanplot(data=pandas.DataFrame(series),
73                          CHR=chrom,
74                          color='#1D2A44,#441D2A',
75                          suggestiveline=2e-10,
76                          genomewideline=2e-11,
77                          sign_line_cols=["#D62728", "#2CA02C"],
78                          marker=".",
79                          alpha = 0.6,
80                          hline_kws={"linestyle": "--", "lw": 1.3},
81                          title="Tile Variant Manhattan Plot",
82                          xlabel=xlabel,
83                          ylabel=r"$-log_{10}{(P)}$",
84                          xticklabel_kws={"rotation": "vertical"})
85     matplotlib.pyplot.savefig(output_file, bbox_inches="tight")
86     matplotlib.pyplot.close()