From d5a86b8d482c8897627898110e2dfa6d43798228 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 1 Nov 2021 10:00:38 -0400 Subject: [PATCH] Avoid empty "ref" field in anno2vcf output. refs #17763 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- anno2vcf.go | 23 +++++++++++++++++++---- slicenumpy.go | 2 +- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/anno2vcf.go b/anno2vcf.go index 47af7e5caf..3be030036d 100644 --- a/anno2vcf.go +++ b/anno2vcf.go @@ -131,8 +131,8 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std return nil } fields := bytes.Split(line, []byte{','}) - if len(fields) != 8 { - return fmt.Errorf("%s line %d: wrong number of fields (%d != %d): %q", fi.Name(), lineIdx+1, len(fields), 8, line) + if len(fields) < 8 { + return fmt.Errorf("%s line %d: wrong number of fields (%d < %d): %q", fi.Name(), lineIdx+1, len(fields), 8, line) } tile, _ := strconv.ParseInt(string(fields[0]), 10, 64) variant, _ := strconv.ParseInt(string(fields[2]), 10, 64) @@ -141,12 +141,27 @@ func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, std if calls[seq] == nil { calls[seq] = make([]*call, 0, len(lines)/50) } + del := fields[6] + ins := fields[7] + if len(del) == 0 && len(fields) >= 9 { + // "123,,AA,T" means 123insAA + // preceded by T. We record it + // here as 122TdelinsTAA to + // avoid writing an empty + // "ref" field in our VCF. + del = append([]byte(nil), fields[8]...) + ins = append(append([]byte(nil), del...), ins...) + position -= int64(len(del)) + } else { + del = append([]byte(nil), del...) + ins = append([]byte(nil), ins...) + } calls[seq] = append(calls[seq], &call{ tile: int(tile), variant: int(variant), position: int(position), - deletion: append([]byte(nil), fields[6]...), - insertion: append([]byte(nil), fields[7]...), + deletion: del, + insertion: ins, }) } mtx.Lock() diff --git a/slicenumpy.go b/slicenumpy.go index 5aa97ac5a7..fff70d7379 100644 --- a/slicenumpy.go +++ b/slicenumpy.go @@ -349,7 +349,7 @@ func (cmd *sliceNumpy) RunCommand(prog string, args []string, stdin io.Reader, s diffs, _ := hgvs.Diff(reftilestr, strings.ToUpper(string(tv.Sequence)), 0) for _, diff := range diffs { diff.Position += rt.pos - fmt.Fprintf(annow, "%d,%d,%d,%s:g.%s,%s,%d,%s,%s\n", tag, outcol, remap[v], rt.seqname, diff.String(), rt.seqname, diff.Position, diff.Ref, diff.New) + fmt.Fprintf(annow, "%d,%d,%d,%s:g.%s,%s,%d,%s,%s,%s\n", tag, outcol, remap[v], rt.seqname, diff.String(), rt.seqname, diff.Position, diff.Ref, diff.New, diff.Left) } } } -- 2.30.2