if err != nil {
return 1
}
- runner.Args = []string{"annotate", "-local=true", fmt.Sprintf("-variant-hash=%v", cmd.variantHash), "-max-tile-size", strconv.Itoa(cmd.maxTileSize), "-i", *inputFilename, "-o", "/mnt/output/tilevariants.tsv"}
+ runner.Args = []string{"annotate", "-local=true", fmt.Sprintf("-variant-hash=%v", cmd.variantHash), "-max-tile-size", strconv.Itoa(cmd.maxTileSize), "-i", *inputFilename, "-o", "/mnt/output/tilevariants.csv"}
var output string
output, err = runner.Run()
if err != nil {
return 1
}
- fmt.Fprintln(stdout, output+"/tilevariants.tsv")
+ fmt.Fprintln(stdout, output+"/tilevariants.csv")
return 0
}
}
refnamefield := ""
if refnamecol {
- refnamefield = "\t" + refname
+ refnamefield = "," + trimFilenameForLabel(refname)
}
- outch <- fmt.Sprintf("%d\t%s%s\t%s:g.%s\n", tag, varid, refnamefield, seqname, diff.String())
+ outch <- fmt.Sprintf("%d,%s%s,%s:g.%s\n", tag, varid, refnamefield, seqname, diff.String())
}
}()
}
priority := flags.Int("priority", 500, "container request priority")
inputFilename := flags.String("i", "-", "input `file`")
outputFilename := flags.String("o", "-", "output `file`")
- annotationsFilename := flags.String("output-annotations", "", "output `file` for tile variant annotations tsv")
- librefsFilename := flags.String("output-onehot2tilevar", "", "when using -one-hot, create tsv `file` mapping column# to tag# and variant#")
+ annotationsFilename := flags.String("output-annotations", "", "output `file` for tile variant annotations csv")
+ librefsFilename := flags.String("output-onehot2tilevar", "", "when using -one-hot, create csv `file` mapping column# to tag# and variant#")
+ labelsFilename := flags.String("output-labels", "", "output `file` for genome labels csv")
onehot := flags.Bool("one-hot", false, "recode tile variants as one-hot")
cmd.filter.Flags(flags)
err = flags.Parse(args)
fmt.Sprintf("-one-hot=%v", *onehot),
"-i", *inputFilename,
"-o", "/mnt/output/matrix.npy",
- "-output-annotations", "/mnt/output/annotations.tsv",
- "-output-onehot2tilevar", "/mnt/output/onehot2tilevar.tsv",
+ "-output-annotations", "/mnt/output/annotations.csv",
+ "-output-onehot2tilevar", "/mnt/output/onehot2tilevar.csv",
+ "-output-labels", "/mnt/output/labels.csv",
"-max-variants", fmt.Sprintf("%d", cmd.filter.MaxVariants),
"-min-coverage", fmt.Sprintf("%f", cmd.filter.MinCoverage),
"-max-tag", fmt.Sprintf("%d", cmd.filter.MaxTag),
}
log.Info("building numpy array")
- out, rows, cols := cgs2array(tilelib)
+ out, rows, cols, names := cgs2array(tilelib)
+
+ if *labelsFilename != "" {
+ log.Infof("writing labels to %s", *labelsFilename)
+ var f *os.File
+ f, err = os.OpenFile(*labelsFilename, os.O_CREATE|os.O_WRONLY, 0777)
+ if err != nil {
+ return 1
+ }
+ defer f.Close()
+ for i, name := range names {
+ _, err = fmt.Fprintf(f, "%d,%q\n", i, trimFilenameForLabel(name))
+ if err != nil {
+ err = fmt.Errorf("write %s: %w", *labelsFilename, err)
+ return 1
+ }
+ }
+ err = f.Close()
+ if err != nil {
+ err = fmt.Errorf("close %s: %w", *labelsFilename, err)
+ return 1
+ }
+ }
log.Info("writing numpy file")
var output io.WriteCloser
}
defer f.Close()
for i, libref := range librefs {
- _, err = fmt.Fprintf(f, "%d\t%d\t%d\n", i, libref.Tag, libref.Variant)
+ _, err = fmt.Fprintf(f, "%d,%d,%d\n", i, libref.Tag, libref.Variant)
if err != nil {
return err
}
return f.Close()
}
-func cgs2array(tilelib *tileLibrary) (data []int16, rows, cols int) {
- var cgnames []string
+func cgs2array(tilelib *tileLibrary) (data []int16, rows, cols int, cgnames []string) {
for name := range tilelib.compactGenomes {
cgnames = append(cgnames, name)
}
}
func (nopCloser) Close() error { return nil }
+
+func trimFilenameForLabel(s string) string {
+ if i := strings.LastIndex(s, "/"); i >= 0 {
+ s = s[i+1:]
+ }
+ s = strings.TrimSuffix(s, ".gz")
+ s = strings.TrimSuffix(s, ".fa")
+ s = strings.TrimSuffix(s, ".fasta")
+ s = strings.TrimSuffix(s, ".1")
+ s = strings.TrimSuffix(s, ".2")
+ s = strings.TrimSuffix(s, ".gz")
+ s = strings.TrimSuffix(s, ".vcf")
+ return s
+}
c.Check(annotateout.Len() > 0, check.Equals, true)
sorted := sortLines(annotateout.String())
c.Logf("%s", sorted)
- c.Check(sorted, check.Equals, sortLines(`0 8d4fe9a63921b chr1:g.161A>T
-0 8d4fe9a63921b chr1:g.178A>T
-0 8d4fe9a63921b chr1:g.1_3delinsGGC
-0 8d4fe9a63921b chr1:g.222_224del
-0 ba4263ca4199c chr1:g.1_3delinsGGC
-0 ba4263ca4199c chr1:g.222_224del
-0 ba4263ca4199c chr1:g.41_42delinsAA
-1 139890345dbb8 chr1:g.302_305delinsAAAA
-4 cbfca15d241d3 chr2:g.125_127delinsAAA
-4 cbfca15d241d3 chr2:g.1_3delinsAAA
-4 f5fafe9450b02 chr2:g.241_245delinsAAAAA
-4 f5fafe9450b02 chr2:g.291C>A
-4 fe9a71a42adb4 chr2:g.125_127delinsAAA
-6 e36dce85efbef chr2:g.471_472delinsAA
-6 f81388b184f4a chr2:g.470_472del
+ c.Check(sorted, check.Equals, sortLines(`0,8d4fe9a63921b,chr1:g.161A>T
+0,8d4fe9a63921b,chr1:g.178A>T
+0,8d4fe9a63921b,chr1:g.1_3delinsGGC
+0,8d4fe9a63921b,chr1:g.222_224del
+0,ba4263ca4199c,chr1:g.1_3delinsGGC
+0,ba4263ca4199c,chr1:g.222_224del
+0,ba4263ca4199c,chr1:g.41_42delinsAA
+1,139890345dbb8,chr1:g.302_305delinsAAAA
+4,cbfca15d241d3,chr2:g.125_127delinsAAA
+4,cbfca15d241d3,chr2:g.1_3delinsAAA
+4,f5fafe9450b02,chr2:g.241_245delinsAAAAA
+4,f5fafe9450b02,chr2:g.291C>A
+4,fe9a71a42adb4,chr2:g.125_127delinsAAA
+6,e36dce85efbef,chr2:g.471_472delinsAA
+6,f81388b184f4a,chr2:g.470_472del
`))
}