1 // Copyright (C) The Lightning Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
23 "git.arvados.org/arvados.git/sdk/go/arvados"
24 log "github.com/sirupsen/logrus"
27 type anno2vcf struct {
30 func (cmd *anno2vcf) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
34 fmt.Fprintf(stderr, "%s\n", err)
37 flags := flag.NewFlagSet("", flag.ContinueOnError)
38 flags.SetOutput(stderr)
39 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
40 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
41 projectUUID := flags.String("project", "", "project `UUID` for output data")
42 priority := flags.Int("priority", 500, "container request priority")
43 inputDir := flags.String("input-dir", "./in", "input `directory`")
44 outputDir := flags.String("output-dir", "./out", "output `directory`")
45 err = flags.Parse(args)
46 if err == flag.ErrHelp {
49 } else if err != nil {
55 log.Println(http.ListenAndServe(*pprof, nil))
60 runner := arvadosContainerRunner{
61 Name: "lightning anno2vcf",
62 Client: arvados.NewClientFromEnv(),
63 ProjectUUID: *projectUUID,
70 err = runner.TranslatePaths(inputDir)
74 runner.Args = []string{"anno2vcf", "-local=true",
76 "-input-dir", *inputDir,
77 "-output-dir", "/mnt/output",
80 output, err = runner.Run()
84 fmt.Fprintln(stdout, output)
88 d, err := open(*inputDir)
94 fis, err := d.Readdir(-1)
100 sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
112 throttle := throttle{Max: runtime.GOMAXPROCS(0)}
113 log.Print("reading input files")
114 for _, fi := range fis {
115 if !strings.HasSuffix(fi.Name(), "annotations.csv") {
118 filename := *inputDir + "/" + fi.Name()
121 defer throttle.Release()
122 log.Printf("reading %s", filename)
123 buf, err := ioutil.ReadFile(filename)
125 throttle.Report(fmt.Errorf("%s: %s", filename, err))
128 lines := bytes.Split(buf, []byte{'\n'})
129 calls := make([]*call, 0, len(lines))
130 for lineIdx, line := range lines {
134 if lineIdx & ^0xfff == 0 && throttle.Err() != nil {
137 fields := bytes.Split(line, []byte{','})
138 if len(fields) != 8 {
139 throttle.Report(fmt.Errorf("%s line %d: wrong number of fields (%d != %d): %q", fi.Name(), lineIdx+1, len(fields), 8, line))
142 tile, _ := strconv.ParseInt(string(fields[0]), 10, 64)
143 variant, _ := strconv.ParseInt(string(fields[2]), 10, 64)
144 position, _ := strconv.ParseInt(string(fields[5]), 10, 64)
145 calls = append(calls, &call{
147 variant: int(variant),
148 sequence: append([]byte(nil), fields[4]...),
149 position: int(position),
150 deletion: append([]byte(nil), fields[6]...),
151 insertion: append([]byte(nil), fields[7]...),
155 allcalls = append(allcalls, calls...)
160 if throttle.Err() != nil {
161 log.Print(throttle.Err())
165 sort.Slice(allcalls, func(i, j int) bool {
166 ii, jj := allcalls[i], allcalls[j]
167 if cmp := bytes.Compare(ii.sequence, jj.sequence); cmp != 0 {
170 if cmp := ii.position - jj.position; cmp != 0 {
173 if cmp := len(ii.deletion) - len(jj.deletion); cmp != 0 {
176 if cmp := bytes.Compare(ii.insertion, jj.insertion); cmp != 0 {
179 if cmp := ii.tile - jj.tile; cmp != 0 {
182 return ii.variant < jj.variant
185 vcfFilename := *outputDir + "/annotations.vcf"
186 log.Printf("writing %s", vcfFilename)
187 f, err := os.Create(vcfFilename)
192 bufw := bufio.NewWriterSize(f, 1<<20)
193 _, err = fmt.Fprintf(bufw, `##fileformat=VCFv4.0
194 ##INFO=<ID=TV,Number=.,Type=String,Description="tile-variant">
195 #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO
200 placeholder := []byte{'.'}
201 for i := 0; i < len(allcalls); {
204 info := fmt.Sprintf("TV=,%d-%d,", call.tile, call.variant)
205 for i < len(allcalls) &&
206 bytes.Equal(call.sequence, allcalls[i].sequence) &&
207 call.position == allcalls[i].position &&
208 len(call.deletion) == len(allcalls[i].deletion) &&
209 bytes.Equal(call.insertion, allcalls[i].insertion) {
212 info += fmt.Sprintf("%d-%d,", call.tile, call.variant)
214 deletion := call.deletion
215 if len(deletion) == 0 {
216 deletion = placeholder
218 insertion := call.insertion
219 if len(insertion) == 0 {
220 insertion = placeholder
222 _, err = fmt.Fprintf(bufw, "%s\t%d\t.\t%s\t%s\t.\t.\t%s\n", call.sequence, call.position, deletion, insertion, info)