Updating report generation code

author Sarah Wait Zaranek <swz@curii.com>

Wed, 1 Jul 2020 18:51:42 +0000 (18:51 +0000)

committer Sarah Wait Zaranek <swz@curii.com>

Wed, 1 Jul 2020 18:51:42 +0000 (18:51 +0000)
author Sarah Wait Zaranek <swz@curii.com>
Wed, 1 Jul 2020 18:51:42 +0000 (18:51 +0000)
committer Sarah Wait Zaranek <swz@curii.com>
Wed, 1 Jul 2020 18:51:42 +0000 (18:51 +0000)
diff --git a/WGS-processing/src/annotation/generatereport.py b/WGS-processing/src/annotation/generatereport.py

index df9d9ad3267960806e06cb60ab2e76af962acc5b..d07ec932be32379340f69a10ed8fc2e3ca1cf0af 100644 (file)
--- a/WGS-processing/src/annotation/generatereport.py
+++ b/WGS-processing/src/annotation/generatereport.py
@@ -4,6 +4,22 @@ import pandas as pd
  import io
  import argparse
  
+def tablegeneration(reportdata,sectionlabel):
+    labelhtml = '<h2>'+sectionlabel+'</h2>'
+    # creating html table from dataframe
+    reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]]
+
+    reportdatasub['Disease Name'] = reportdatasub['Disease Name'].str.replace('|','<br/>')
+    str_io = io.StringIO()
+    reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False)
+    html_str = str_io.getvalue()
+    html_str_encoded = unicode(html_str).encode('utf8')
+    html_str_encoded = html_str_encoded.replace('&lt;','<')
+    html_str_encoded = html_str_encoded.replace('&gt;','>')
+    html_str_encoded = html_str_encoded.replace('_',' ')
+    section_html = labelhtml+html_str_encoded
+    return section_html
+
  def generatereport():
  
      parser = argparse.ArgumentParser()
@@ -28,19 +44,9 @@ def generatereport():
      # reading data into dataframe
      headerlist = ["Variant ID", "Chromosome", "Position", "Ref","Alt","Allele ID", "Clinical Significance","Disease Name","Frequency GO-ESP", "Frequency EXAC", "Frequency 1000 Genomes Project","GT"]
      reportdata = pd.read_csv(filename,header=0,names=headerlist,sep='\t')
-    reportdata['Zygosity']="."
-
+    
      # defining zygosity
-    idxHOM1 = reportdata.GT=='1|1' 
-    idxHOM2 = reportdata.GT=='1/1'
-    idxHET1 = reportdata.GT=='1|0'
-    idxHET2 = reportdata.GT=='1/0'
-    idxHET3 = reportdata.GT=='0|1'
-    idxHET4 = reportdata.GT=='0/1'
-    idxHOM = idxHOM1 | idxHOM2
-    idxHET = idxHET1 | idxHET2 | idxHET3 | idxHET4
-    reportdata.Zygosity[idxHOM]='HOM'
-    reportdata.Zygosity[idxHET]='HET'
+    reportdata['Zygosity'] = reportdata.GT
  
      # creating url from variant ID
      clinvarURL =  "https://www.ncbi.nlm.nih.gov/clinvar/variation/" 
@@ -48,25 +54,40 @@ def generatereport():
      reportdata.to_json('test.json',orient='records')
      str_io = io.StringIO()
  
-    # creating html table from dataframe
-    reportdatasub = reportdata[["Variant ID", "Allele ID", "Clinical Significance","Disease Name", "Frequency EXAC", "Frequency 1000 Genomes Project","Zygosity","URL"]]
-    reportdatasub.to_html(buf=str_io, classes='table table-bordered',index_names=False,index=False)
-    html_str = str_io.getvalue()
-    html_str_encoded = unicode(html_str).encode('utf8')
-    html_str_encoded = html_str_encoded.replace('&lt;','<')
-    html_str_encoded = html_str_encoded.replace('&gt;','>')
-    html_str_encoded = html_str_encoded.replace('|','<br/>')
+    idxP = reportdata['Clinical Significance'].str.contains('Pathogenic')
+    idxLP = reportdata['Clinical Significance'].str.contains('Likely_pathogenic')
+    idxD = reportdata['Clinical Significance'].str.contains('drug_response') 
+    idxPro = reportdata['Clinical Significance'].str.contains('protective')
+    idxRisk = reportdata['Clinical Significance'].str.contains('risk_factor')
+    idxA = reportdata['Clinical Significance'].str.contains('Affects')
+    idxB = reportdata['Clinical Significance'].str.contains('Benign')
+    idxLB = reportdata['Clinical Significance'].str.contains('Likely_benign') 
+    idxAs = reportdata['Clinical Significance'].str.contains('association')
  
+    idxOther = ~(idxAs | idxLB | idxB | idxA | idxRisk | idxPro | idxD | idxP | idxLP)
+ 
      html_file = open(headfile, 'r')
      source_code_head = html_file.read() 
+    source_code_head = source_code_head.replace('ClinVar Report','ClinVar Report For ' + samplename)
      html_file.close()
  
      html_file = open(tailfile, 'r')
      source_code_tail = html_file.read()
      html_file.close()
+ 
+    pathogenic_html = tablegeneration(reportdata[idxP],'Pathogenic')
+    likely_pathogenic_html = tablegeneration(reportdata[idxLP],'Likely Pathogenic') 
+    drug_html = tablegeneration(reportdata[idxD],'Drug Response')
+    protective_html = tablegeneration(reportdata[idxPro],'Protective')
+    risk_html = tablegeneration(reportdata[idxRisk],'Risk Factor')
+    affects_html = tablegeneration(reportdata[idxA],'Affects')
+    association_html = tablegeneration(reportdata[idxAs],'Association')
+    benign_html = tablegeneration(reportdata[idxB],'Benign')
+    likely_benign_html = tablegeneration(reportdata[idxLB],'Likely Benign')
+    other_html = tablegeneration(reportdata[idxOther],'Other') 
  
      # combine html table with head and tail html for total report
-    total_html = source_code_head + html_str_encoded + source_code_tail
+    total_html = source_code_head + pathogenic_html + likely_pathogenic_html + drug_html + protective_html + risk_html + affects_html + association_html + other_html + benign_html + likely_benign_html + source_code_tail
    
      # write out report html
      f = open(samplename+'.html','wb')
diff --git a/WGS-processing/src/annotation/head.html b/WGS-processing/src/annotation/head.html

index 62dda85e12aad8dde25d0dc569c58361919c8558..a6e0f83f0c773120fd6ff89192c30ee3b6d53c24 100644 (file)
--- a/WGS-processing/src/annotation/head.html
+++ b/WGS-processing/src/annotation/head.html
@@ -2,9 +2,9 @@
  <html>
  <head>
    <style>
-   thead th { text-align:left; background:grey; color:white}
+  thead th { text-align:left; background:midnightblue; color:white}
     table {border-collapse:collapse; word-wrap:break-word;}
-   table td {border:solid 1px #fab; width:200px; word-wrap:break-word; background:lightgrey; overflow:hidden;}
+   table td {border:solid 2px; width:200px; word-wrap:break-word; background:lightgrey; overflow:hidden;}
    </style>
  <body>
  <h1> ClinVar Report</h1>
author	Sarah Wait Zaranek <swz@curii.com>
	Wed, 1 Jul 2020 18:51:42 +0000 (18:51 +0000)
committer	Sarah Wait Zaranek <swz@curii.com>
	Wed, 1 Jul 2020 18:51:42 +0000 (18:51 +0000)
WGS-processing/src/annotation/generatereport.py		patch \| blob \| history
WGS-processing/src/annotation/head.html		patch \| blob \| history