import numpy as np
import scipy as scipy
import pandas as pd
import io
import argparse
def tablegeneration(reportdata,sectionlabel):
labelhtml = '
'+sectionlabel+'
'
# creating html table from dataframe
reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]]
reportdatasub['Disease Name'] = reportdatasub['Disease Name'].str.replace('|','
')
str_io = io.StringIO()
reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False)
html_str = str_io.getvalue()
html_str_encoded = unicode(html_str).encode('utf8')
html_str_encoded = html_str_encoded.replace('<','<')
html_str_encoded = html_str_encoded.replace('>','>')
html_str_encoded = html_str_encoded.replace('_',' ')
section_html = labelhtml+html_str_encoded
return section_html
def generatereport():
parser = argparse.ArgumentParser()
parser.add_argument('txtfilename', metavar='VCF2TXTFILENAME', help='text file of info to annotate')
parser.add_argument('samplename', metavar='SAMPLENAME', help='name of sample to use on report')
parser.add_argument('headfile', metavar='REPORTHEADHTML', help='head html for report')
parser.add_argument('tailfile', metavar='REPORTTAILHTML', help='tail html for report')
args = parser.parse_args()
pd.set_option("display.max_colwidth", 10000)
# filename = "reportdata.txt"
# samplename = "hu34D5B9_var-GS000015891-ASM"
# headfile = "head.html"
# tailfile = "tail.html"
filename = args.txtfilename
samplename = args.samplename
headfile = args.headfile
tailfile = args.tailfile
# reading data into dataframe
headerlist = ["Variant ID", "Chromosome", "Position", "Ref","Alt","Allele ID", "Clinical Significance","Disease Name","Frequency GO-ESP", "Frequency EXAC", "Frequency 1000 Genomes Project","GT"]
reportdata = pd.read_csv(filename,header=0,names=headerlist,sep='\t')
# defining zygosity
reportdata['Zygosity'] = reportdata.GT
# creating url from variant ID
clinvarURL = "https://www.ncbi.nlm.nih.gov/clinvar/variation/"
reportdata['URL'] = ' Link to ClinVar'
reportdata.to_json('test.json',orient='records')
str_io = io.StringIO()
idxP = reportdata['Clinical Significance'].str.contains('Pathogenic')
idxLP = reportdata['Clinical Significance'].str.contains('Likely_pathogenic')
idxD = reportdata['Clinical Significance'].str.contains('drug_response')
idxPro = reportdata['Clinical Significance'].str.contains('protective')
idxRisk = reportdata['Clinical Significance'].str.contains('risk_factor')
idxA = reportdata['Clinical Significance'].str.contains('Affects')
idxB = reportdata['Clinical Significance'].str.contains('Benign')
idxLB = reportdata['Clinical Significance'].str.contains('Likely_benign')
idxAs = reportdata['Clinical Significance'].str.contains('association')
idxOther = ~(idxAs | idxLB | idxB | idxA | idxRisk | idxPro | idxD | idxP | idxLP)
html_file = open(headfile, 'r')
source_code_head = html_file.read()
source_code_head = source_code_head.replace('ClinVar Report','ClinVar Report For ' + samplename)
html_file.close()
html_file = open(tailfile, 'r')
source_code_tail = html_file.read()
html_file.close()
pathogenic_html = tablegeneration(reportdata[idxP],'Pathogenic')
likely_pathogenic_html = tablegeneration(reportdata[idxLP],'Likely Pathogenic')
drug_html = tablegeneration(reportdata[idxD],'Drug Response')
protective_html = tablegeneration(reportdata[idxPro],'Protective')
risk_html = tablegeneration(reportdata[idxRisk],'Risk Factor')
affects_html = tablegeneration(reportdata[idxA],'Affects')
association_html = tablegeneration(reportdata[idxAs],'Association')
benign_html = tablegeneration(reportdata[idxB],'Benign')
likely_benign_html = tablegeneration(reportdata[idxLB],'Likely Benign')
other_html = tablegeneration(reportdata[idxOther],'Other')
# combine html table with head and tail html for total report
total_html = source_code_head + pathogenic_html + likely_pathogenic_html + drug_html + protective_html + risk_html + affects_html + association_html + other_html + benign_html + likely_benign_html + source_code_tail
# write out report html
f = open(samplename+'.html','wb')
f.write(total_html)
f.close()
if __name__ == '__main__':
generatereport()