WGS-processing/src/annotation/generatereport.py

   1 import numpy as np
   2 import scipy as scipy
   3 import pandas as pd
   4 import io
   5 import argparse
   6
   7 def tablegeneration(reportdata,sectionlabel):
   8     labelhtml = '<h2>'+sectionlabel+'</h2>'
   9     # creating html table from dataframe
  10     reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]]
  11
  12     reportdatasub['Disease Name'] = reportdatasub['Disease Name'].str.replace('|','<br/>')
  13     str_io = io.StringIO()
  14     reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False)
  15     html_str = str_io.getvalue()
  16     html_str_encoded = unicode(html_str).encode('utf8')
  17     html_str_encoded = html_str_encoded.replace('&lt;','<')
  18     html_str_encoded = html_str_encoded.replace('&gt;','>')
  19     html_str_encoded = html_str_encoded.replace('_',' ')
  20     section_html = labelhtml+html_str_encoded
  21     return section_html
  22
  23 def generatereport():
  24
  25     parser = argparse.ArgumentParser()
  26     parser.add_argument('txtfilename', metavar='VCF2TXTFILENAME', help='text file of info to annotate')
  27     parser.add_argument('samplename', metavar='SAMPLENAME', help='name of sample to use on report')
  28     parser.add_argument('headfile', metavar='REPORTHEADHTML', help='head html for report')
  29     parser.add_argument('tailfile', metavar='REPORTTAILHTML', help='tail html for report')
  30     args = parser.parse_args()
  31
  32     pd.set_option("display.max_colwidth", 10000)
  33
  34 #    filename = "reportdata.txt"
  35 #    samplename = "hu34D5B9_var-GS000015891-ASM"
  36 #    headfile = "head.html"
  37 #    tailfile = "tail.html"
  38
  39     filename = args.txtfilename
  40     samplename = args.samplename
  41     headfile = args.headfile
  42     tailfile = args.tailfile
  43
  44     # reading data into dataframe
  45     headerlist = ["Variant ID", "Chromosome", "Position", "Ref","Alt","Allele ID", "Clinical Significance","Disease Name","Frequency GO-ESP", "Frequency EXAC", "Frequency 1000 Genomes Project","GT"]
  46     reportdata = pd.read_csv(filename,header=0,names=headerlist,sep='\t')
  47
  48     # defining zygosity
  49     reportdata['Zygosity'] = reportdata.GT
  50
  51     # creating url from variant ID
  52     clinvarURL =  "https://www.ncbi.nlm.nih.gov/clinvar/variation/"
  53     reportdata['URL'] = '<a href=' + clinvarURL + reportdata['Variant ID'].apply(str) + '> Link to ClinVar</a>'
  54     reportdata.to_json('test.json',orient='records')
  55     str_io = io.StringIO()
  56
  57     idxP = reportdata['Clinical Significance'].str.contains('Pathogenic')
  58     idxLP = reportdata['Clinical Significance'].str.contains('Likely_pathogenic')
  59     idxD = reportdata['Clinical Significance'].str.contains('drug_response')
  60     idxPro = reportdata['Clinical Significance'].str.contains('protective')
  61     idxRisk = reportdata['Clinical Significance'].str.contains('risk_factor')
  62     idxA = reportdata['Clinical Significance'].str.contains('Affects')
  63     idxB = reportdata['Clinical Significance'].str.contains('Benign')
  64     idxLB = reportdata['Clinical Significance'].str.contains('Likely_benign')
  65     idxAs = reportdata['Clinical Significance'].str.contains('association')
  66
  67     idxOther = ~(idxAs | idxLB | idxB | idxA | idxRisk | idxPro | idxD | idxP | idxLP)
  68
  69     html_file = open(headfile, 'r')
  70     source_code_head = html_file.read()
  71     source_code_head = source_code_head.replace('ClinVar Report','ClinVar Report For ' + samplename)
  72     html_file.close()
  73
  74     html_file = open(tailfile, 'r')
  75     source_code_tail = html_file.read()
  76     html_file.close()
  77
  78     pathogenic_html = tablegeneration(reportdata[idxP],'Pathogenic')
  79     likely_pathogenic_html = tablegeneration(reportdata[idxLP],'Likely Pathogenic')
  80     drug_html = tablegeneration(reportdata[idxD],'Drug Response')
  81     protective_html = tablegeneration(reportdata[idxPro],'Protective')
  82     risk_html = tablegeneration(reportdata[idxRisk],'Risk Factor')
  83     affects_html = tablegeneration(reportdata[idxA],'Affects')
  84     association_html = tablegeneration(reportdata[idxAs],'Association')
  85     benign_html = tablegeneration(reportdata[idxB],'Benign')
  86     likely_benign_html = tablegeneration(reportdata[idxLB],'Likely Benign')
  87     other_html = tablegeneration(reportdata[idxOther],'Other')
  88
  89     # combine html table with head and tail html for total report
  90     total_html = source_code_head + pathogenic_html + likely_pathogenic_html + drug_html + protective_html + risk_html + affects_html + association_html + other_html + benign_html + likely_benign_html + source_code_tail
  91
  92     # write out report html
  93     f = open(samplename+'.html','wb')
  94     f.write(total_html)
  95     f.close()
  96
  97 if __name__ == '__main__':
  98     generatereport()