Generate a manhattan plot for each chromosome.
authorTom Clegg <tom@curii.com>
Thu, 19 Jan 2023 21:38:41 +0000 (16:38 -0500)
committerTom Clegg <tom@curii.com>
Thu, 19 Jan 2023 21:38:41 +0000 (16:38 -0500)
refs #19958

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

manhattan.py

index 08a4da400a544acad2e6ae73ad002b6362f87482..da5259b77a59e6e52f9a1b877c13c5f5cf7e27ad 100644 (file)
@@ -19,6 +19,7 @@ import qmplot
  ) = sys.argv
 csv_threshold = float(csv_threshold_str)
 
+print(f'loading onehot-columns.npy', file=sys.stderr)
 columns = numpy.load(os.path.join(input_path, 'onehot-columns.npy'))
 
 # pvalue maps tag# => [pvalue1, pvalue2, ...] (one het p-value and one hom p-value for each tile variant)
@@ -29,6 +30,7 @@ for i in range(columns.shape[1]):
     x.append(pow(10, -columns[4,i] / 1000000))
     pvalue[tag] = x
 
+print(f'building tilepos dict', file=sys.stderr)
 # tilepos maps tag# => (chromosome, position)
 tilepos = {}
 for dirent in os.scandir(input_path):
@@ -40,12 +42,14 @@ for dirent in os.scandir(input_path):
                     tilepos[int(annotation[0])] = (annotation[4], int(annotation[5]))
 
 if csv_threshold > 0 and csv_output_path != "":
+    print(f'writing csv {csv_output_path}', file=sys.stderr)
     with open(csv_output_path, 'wt') as f:
         for tag, chrpos in sorted(tilepos.items(), key=lambda item: (item[1][0][-1] > '9', item[1][0].lstrip('chr').zfill(2), item[1][1])):
             for p in pvalue.get(tag, []):
                 if p < pow(10, -csv_threshold):
                     print(f'{tag},{chrpos[0]},{chrpos[1]},{p}', file=f)
 
+print(f'building series dict', file=sys.stderr)
 series = {"#CHROM": [], "POS": [], "P": []}
 for tag, chrpos in sorted(tilepos.items(), key=lambda item: (item[1][0][-1] > '9', item[1][0].lstrip('chr').zfill(2), item[1][1])):
     for p in pvalue.get(tag, []):
@@ -53,16 +57,29 @@ for tag, chrpos in sorted(tilepos.items(), key=lambda item: (item[1][0][-1] > '9
         series["POS"].append(chrpos[1])
         series["P"].append(p)
 
-qmplot.manhattanplot(data=pandas.DataFrame(series),
-                     suggestiveline=2e-10,  # Turn off suggestiveline
-                     genomewideline=2e-11,  # Turn off genomewidel
-                     sign_line_cols=["#D62728", "#2CA02C"],
-                     marker=".",
-                     alpha = 0.6,
-                     hline_kws={"linestyle": "--", "lw": 1.3},
-                     title="Tile Variant Manhattan Plot",
-                     # xtick_label_set=xtick,
-                     xlabel="Chromosome",
-                     ylabel=r"$-log_{10}{(P)}$",
-                     xticklabel_kws={"rotation": "vertical"})
-matplotlib.pyplot.savefig(output_path, bbox_inches="tight")
+chroms = {}
+for chrom in series["#CHROM"]:
+    chroms[chrom] = True
+chroms[None] = True
+
+print(f'generating plots', file=sys.stderr)
+for chrom in chroms.keys():
+    output_file = output_path
+    xlabel = "Chromosome"
+    if chrom:
+        output_file = f'.{chrom}.'.join(output_file.rsplit('.', 1))
+        xlabel = f'position on {chrom}'
+    qmplot.manhattanplot(data=pandas.DataFrame(series),
+                         CHR=chrom,
+                         #suggestiveline=2e-10,  # Turn off suggestiveline
+                         #genomewideline=2e-11,  # Turn off genomewidel
+                         sign_line_cols=["#D62728", "#2CA02C"],
+                         marker=".",
+                         alpha = 0.6,
+                         hline_kws={"linestyle": "--", "lw": 1.3},
+                         title="Tile Variant Manhattan Plot",
+                         xlabel=xlabel,
+                         ylabel=r"$-log_{10}{(P)}$",
+                         xticklabel_kws={"rotation": "vertical"})
+    matplotlib.pyplot.savefig(output_file, bbox_inches="tight")
+    matplotlib.pyplot.close()