Update memory-size log message.
[lightning.git] / anno2vcf.go
index 3be030036d7c20f817f95a43439a24aa6a5f8f4b..85d89eb45896ae00ce50c230a353e60c91f44565 100644 (file)
@@ -10,7 +10,6 @@ import (
        "flag"
        "fmt"
        "io"
-       "io/ioutil"
        "net/http"
        _ "net/http/pprof"
        "os"
@@ -105,6 +104,7 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std
                position  int
                deletion  []byte
                insertion []byte
+               hgvsID    []byte
        }
        allcalls := map[string][]*call{}
        var mtx sync.Mutex
@@ -117,23 +117,35 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std
                filename := *inputDir + "/" + fi.Name()
                thr.Go(func() error {
                        log.Printf("reading %s", filename)
-                       buf, err := ioutil.ReadFile(filename)
+                       f, err := open(filename)
+                       if err != nil {
+                               return err
+                       }
+                       defer f.Close()
+                       buf, err := io.ReadAll(f)
                        if err != nil {
                                return fmt.Errorf("%s: %s", filename, err)
                        }
+                       f.Close()
                        lines := bytes.Split(buf, []byte{'\n'})
                        calls := map[string][]*call{}
                        for lineIdx, line := range lines {
                                if len(line) == 0 {
                                        continue
                                }
-                               if lineIdx & ^0xfff == 0 && thr.Err() != nil {
+                               if lineIdx&0xff == 0 && thr.Err() != nil {
                                        return nil
                                }
                                fields := bytes.Split(line, []byte{','})
                                if len(fields) < 8 {
                                        return fmt.Errorf("%s line %d: wrong number of fields (%d < %d): %q", fi.Name(), lineIdx+1, len(fields), 8, line)
                                }
+                               hgvsID := fields[3]
+                               if len(hgvsID) < 2 {
+                                       // "=" reference or ""
+                                       // non-diffable tile variant
+                                       continue
+                               }
                                tile, _ := strconv.ParseInt(string(fields[0]), 10, 64)
                                variant, _ := strconv.ParseInt(string(fields[2]), 10, 64)
                                position, _ := strconv.ParseInt(string(fields[5]), 10, 64)
@@ -143,15 +155,18 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std
                                }
                                del := fields[6]
                                ins := fields[7]
-                               if len(del) == 0 && len(fields) >= 9 {
+                               if (len(del) == 0 || len(ins) == 0) && len(fields) >= 9 {
                                        // "123,,AA,T" means 123insAA
                                        // preceded by T. We record it
-                                       // here as 122TdelinsTAA to
+                                       // here as "122 T TAA" to
                                        // avoid writing an empty
-                                       // "ref" field in our VCF.
-                                       del = append([]byte(nil), fields[8]...)
-                                       ins = append(append([]byte(nil), del...), ins...)
-                                       position -= int64(len(del))
+                                       // "ref" field in our
+                                       // VCF. Similarly, we record
+                                       // deletions as "122 TAA T"
+                                       // rather than "123 AA .".
+                                       del = append(append(make([]byte, 0, len(fields[8])+len(del)), fields[8]...), del...)
+                                       ins = append(append(make([]byte, 0, len(fields[8])+len(ins)), fields[8]...), ins...)
+                                       position -= int64(len(fields[8]))
                                } else {
                                        del = append([]byte(nil), del...)
                                        ins = append([]byte(nil), ins...)
@@ -162,6 +177,7 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std
                                        position:  int(position),
                                        deletion:  del,
                                        insertion: ins,
+                                       hgvsID:    hgvsID,
                                })
                        }
                        mtx.Lock()
@@ -235,7 +251,7 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std
                                if len(insertion) == 0 {
                                        insertion = placeholder
                                }
-                               _, err = fmt.Fprintf(bufw, "%s\t%d\t.\t%s\t%s\t.\t.\t%s\n", seq, call.position, deletion, insertion, info)
+                               _, err = fmt.Fprintf(bufw, "%s\t%d\t%s\t%s\t%s\t.\t.\t%s\n", seq, call.position, call.hgvsID, deletion, insertion, info)
                                if err != nil {
                                        return err
                                }