- if cmd.genomeFile == "" {
- // TODO: get chromosome sizes from VCF header, ##contig=<ID=chr10,length=133797422>
- return errors.New("cannot apply mask without -genome argument")
+ chrSize := map[string]int{}
+
+ vcffile, err := open(infile)
+ if err != nil {
+ return err
+ }
+ defer vcffile.Close()
+ var rdr io.Reader = vcffile
+ rdr = bufio.NewReaderSize(rdr, 8*1024*1024)
+ if strings.HasSuffix(infile, ".gz") {
+ rdr, err = gzip.NewReader(vcffile)
+ if err != nil {
+ return err
+ }
+ }
+ contigre := regexp.MustCompile(`([^=,]*)=([^>,]*)`)
+ scanner := bufio.NewScanner(rdr)
+ for scanner.Scan() {
+ if s := scanner.Text(); !strings.HasPrefix(s, "##") {
+ break
+ } else if !strings.HasPrefix(s, "##contig=<") {
+ continue
+ } else {
+ kv := map[string]string{}
+ for _, m := range contigre.FindAllStringSubmatch(s[10:], -1) {
+ kv[m[1]] = m[2]
+ }
+ if kv["ID"] != "" && kv["length"] != "" {
+ chrSize[kv["ID"]], _ = strconv.Atoi(kv["length"])
+ }
+ }
+ }
+ if err = scanner.Err(); err != nil {
+ return fmt.Errorf("error scanning input file %q: %s", infile, err)