Add cwl and docker files
[lightning.git] / cwl / preprocess / gvcf / src / filter-gvcf
1 # Copyright (C) The Lightning Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 #!/usr/bin/python
6
7 import argparse
8 import sys
9
10
11 def filter_gvcf():
12
13     # setting up inputs
14     parser = argparse.ArgumentParser(prog="filter-gvcf", description="Filter a \
15                                     gVCF with a user-set quality threshold.")
16     parser.add_argument("-k", "--keepGQdot", help="Keeps the variant when GQ \
17                         is '.'", action="store_true")
18     parser.add_argument("quality_threshold", metavar="QUALITY", help="Quality \
19                         threshold", type=int)
20     parser.add_argument("gvcf", metavar="GVCF", nargs='?',
21                         type=argparse.FileType('r'), default=sys.stdin, help="\
22                         Input gVCF to filter from stdin")
23
24     args = parser.parse_args()
25     keepGQdot = args.keepGQdot
26     quality_threshold = args.quality_threshold
27     gvcf = args.gvcf
28
29     for line in gvcf:
30         line = line.strip()
31
32         # retain header and info lines
33         if len(line) == 0:
34             continue
35         if line[0] == '#':
36             print line
37             continue
38
39         fields = line.split('\t')
40
41         if len(fields) < 10:
42             continue
43
44         FORMAT_fields = fields[8].split(":")
45         sample_fields = fields[9].split(":")
46
47         # filter quality scores below the threshold
48         try:
49             GQ_index = FORMAT_fields.index('GQ')
50             GQ = sample_fields[GQ_index]
51             if GQ.isdigit():
52                 if quality_threshold <= int(GQ):
53                     print line
54             elif GQ == "." and keepGQdot:
55                 print line
56         except ValueError:
57             print line
58
59 if __name__ == '__main__':
60     filter_gvcf()