From ceab7a9a46cfe8a64260b52b37be163cd215925f Mon Sep 17 00:00:00 2001 From: Sarah Wait Zaranek Date: Thu, 27 Feb 2020 18:52:56 +0000 Subject: [PATCH] Updating report python code and removing vep step from workflow Arvados-DCO-1.1-Signed-off-by: Sarah Wait Zaranek no issue # --- annotation/new/generatereport.py | 99 +++++++++++++++++++------------- 1 file changed, 59 insertions(+), 40 deletions(-) diff --git a/annotation/new/generatereport.py b/annotation/new/generatereport.py index c9bb5f1..6f39bb5 100644 --- a/annotation/new/generatereport.py +++ b/annotation/new/generatereport.py @@ -2,56 +2,75 @@ import numpy as np import scipy as scipy import pandas as pd import io +import argparse -pd.set_option("display.max_colwidth", 10000) +def generatereport(): -filename = "reportdata.txt" -samplename = "hu34D5B9_var-GS000015891-ASM" -headfile = "head.html" -tailfile = "tail.html" + parser = argparse.ArgumentParser() + parser.add_argument('txtfilename', metavar='VCF2TXTFILENAME', help='text file of info to annotate') + parser.add_argument('samplename', metavar='SAMPLENAME', help='name of sample to use on report') + parser.add_argument('headfile', metavar='REPORTHEADHTML', help='head html for report') + parser.add_argument('tailfile', metavar='REPORTTAILHTML', help='tail html for report') + args = parser.parse_args() -headerlist = ["Variant ID", "Chromosome", "Position", "Ref","Alt","Allele ID", "Clinical Significance","Disease Name","Frequency GO-ESP", "Frequency EXAC", "Frequency 1000 Genomes Project","GT"] -reportdata = pd.read_csv(filename,header=0,names=headerlist,sep='\t') -reportdata['Zygosity']="." + pd.set_option("display.max_colwidth", 10000) -idxHOM1 = reportdata.GT=='1|1' -idxHOM2 = reportdata.GT=='1/1' -idxHET1 = reportdata.GT=='1|0' -idxHET2 = reportdata.GT=='1/0' -idxHET3 = reportdata.GT=='0|1' -idxHET4 = reportdata.GT=='0/1' -idxHOM = idxHOM1 | idxHOM2 -idxHET = idxHET1 | idxHET2 | idxHET3 | idxHET4 -reportdata.Zygosity[idxHOM]='HOM' -reportdata.Zygosity[idxHET]='HET' +# filename = "reportdata.txt" +# samplename = "hu34D5B9_var-GS000015891-ASM" +# headfile = "head.html" +# tailfile = "tail.html" -#reportdata['URL'] =reportdata['Variant ID'].apply(lambda x: 'About' -reportdata.to_json('test.json',orient='records') -str_io = io.StringIO() + filename = args.txtfilename + samplename = args.samplename + headfile = args.headfile + tailfile = args.tailfile -reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]] + # reading data into dataframe + headerlist = ["Variant ID", "Chromosome", "Position", "Ref","Alt","Allele ID", "Clinical Significance","Disease Name","Frequency GO-ESP", "Frequency EXAC", "Frequency 1000 Genomes Project","GT"] + reportdata = pd.read_csv(filename,header=0,names=headerlist,sep='\t') + reportdata['Zygosity']="." -reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False) -html_str = str_io.getvalue() -html_str_encoded = unicode(html_str).encode('utf8') + # defining zygosity + idxHOM1 = reportdata.GT=='1|1' + idxHOM2 = reportdata.GT=='1/1' + idxHET1 = reportdata.GT=='1|0' + idxHET2 = reportdata.GT=='1/0' + idxHET3 = reportdata.GT=='0|1' + idxHET4 = reportdata.GT=='0/1' + idxHOM = idxHOM1 | idxHOM2 + idxHET = idxHET1 | idxHET2 | idxHET3 | idxHET4 + reportdata.Zygosity[idxHOM]='HOM' + reportdata.Zygosity[idxHET]='HET' -html_str_encoded = html_str_encoded.replace('<','<') -html_str_encoded = html_str_encoded.replace('>','>') -html_str_encoded = html_str_encoded.replace('|','
') + # creating url from variant ID + reportdata['URL'] = 'About' + reportdata.to_json('test.json',orient='records') + str_io = io.StringIO() -html_file = open(headfile, 'r') -source_code_head = html_file.read() -html_file.close() + # creating html table from dataframe + reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]] + reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False) + html_str = str_io.getvalue() + html_str_encoded = unicode(html_str).encode('utf8') + html_str_encoded = html_str_encoded.replace('<','<') + html_str_encoded = html_str_encoded.replace('>','>') + html_str_encoded = html_str_encoded.replace('|','
') -html_file = open(tailfile, 'r') -source_code_tail = html_file.read() -html_file.close() + html_file = open(headfile, 'r') + source_code_head = html_file.read() + html_file.close() + html_file = open(tailfile, 'r') + source_code_tail = html_file.read() + html_file.close() -total_html = source_code_head + html_str_encoded + source_code_tail + # combine html table with head and tail html for total report + total_html = source_code_head + html_str_encoded + source_code_tail + + # write out report html + f = open(samplename+'.html','wb') + f.write(total_html) + f.close() -f = open(samplename+'.html','wb') - -f.write(total_html) -f.close() +if __name__ == '__main__': + generatereport() -- 2.30.2