src/annotation/generatereport.py

   1 import numpy as np
   2 import scipy as scipy
   3 import pandas as pd
   4 import io
   5 import argparse
   6
   7 def generatereport():
   8
   9     parser = argparse.ArgumentParser()
  10     parser.add_argument('txtfilename', metavar='VCF2TXTFILENAME', help='text file of info to annotate')
  11     parser.add_argument('samplename', metavar='SAMPLENAME', help='name of sample to use on report')
  12     parser.add_argument('headfile', metavar='REPORTHEADHTML', help='head html for report')
  13     parser.add_argument('tailfile', metavar='REPORTTAILHTML', help='tail html for report')
  14     args = parser.parse_args()
  15
  16     pd.set_option("display.max_colwidth", 10000)
  17
  18 #    filename = "reportdata.txt"
  19 #    samplename = "hu34D5B9_var-GS000015891-ASM"
  20 #    headfile = "head.html"
  21 #    tailfile = "tail.html"
  22
  23     filename = args.txtfilename
  24     samplename = args.samplename
  25     headfile = args.headfile
  26     tailfile = args.tailfile
  27
  28     # reading data into dataframe
  29     headerlist = ["Variant ID", "Chromosome", "Position", "Ref","Alt","Allele ID", "Clinical Significance","Disease Name","Frequency GO-ESP", "Frequency EXAC", "Frequency 1000 Genomes Project","GT"]
  30     reportdata = pd.read_csv(filename,header=0,names=headerlist,sep='\t')
  31     reportdata['Zygosity']="."
  32
  33     # defining zygosity
  34     idxHOM1 = reportdata.GT=='1|1'
  35     idxHOM2 = reportdata.GT=='1/1'
  36     idxHET1 = reportdata.GT=='1|0'
  37     idxHET2 = reportdata.GT=='1/0'
  38     idxHET3 = reportdata.GT=='0|1'
  39     idxHET4 = reportdata.GT=='0/1'
  40     idxHOM = idxHOM1 | idxHOM2
  41     idxHET = idxHET1 | idxHET2 | idxHET3 | idxHET4
  42     reportdata.Zygosity[idxHOM]='HOM'
  43     reportdata.Zygosity[idxHET]='HET'
  44
  45     # creating url from variant ID
  46     clinvarURL =  "https://www.ncbi.nlm.nih.gov/clinvar/variation/"
  47     reportdata['URL'] = '<a href=' + clinvarURL + reportdata['Variant ID'].apply(str) + '> Link to ClinVar</a>'
  48     reportdata.to_json('test.json',orient='records')
  49     str_io = io.StringIO()
  50
  51     # creating html table from dataframe
  52     reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]]
  53     reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False)
  54     html_str = str_io.getvalue()
  55     html_str_encoded = unicode(html_str).encode('utf8')
  56     html_str_encoded = html_str_encoded.replace('&lt;','<')
  57     html_str_encoded = html_str_encoded.replace('&gt;','>')
  58     html_str_encoded = html_str_encoded.replace('|','<br/>')
  59
  60     html_file = open(headfile, 'r')
  61     source_code_head = html_file.read()
  62     html_file.close()
  63
  64     html_file = open(tailfile, 'r')
  65     source_code_tail = html_file.read()
  66     html_file.close()
  67
  68     # combine html table with head and tail html for total report
  69     total_html = source_code_head + html_str_encoded + source_code_tail
  70
  71     # write out report html
  72     f = open(samplename+'.html','wb')
  73     f.write(total_html)
  74     f.close()
  75
  76 if __name__ == '__main__':
  77     generatereport()