X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/00b2acd54dd1aa412f6f2bddc24b1bbb31c7ae3f..124f6eee3729446b0e095f788f81590ee1dc880d:/annotate.go diff --git a/annotate.go b/annotate.go index ac34ada40f..25651889aa 100644 --- a/annotate.go +++ b/annotate.go @@ -1,3 +1,7 @@ +// Copyright (C) The Lightning Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + package lightning import ( @@ -23,9 +27,11 @@ import ( ) type annotatecmd struct { - variantHash bool - maxTileSize int - tag2tagid map[string]tagID + dropTiles []bool + variantHash bool + maxTileSize int + tag2tagid map[string]tagID + reportAnnotation func(tag tagID, outcol int, variant tileVariantID, refname string, seqname string, pdi hgvs.Variant) } func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { @@ -112,7 +118,7 @@ func (cmd *annotatecmd) RunCommand(prog string, args []string, stdin io.Reader, retainNoCalls: true, retainTileSequences: true, } - err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz"), nil) + err = tilelib.LoadGob(context.Background(), input, strings.HasSuffix(*inputFilename, ".gz")) if err != nil { return 1 } @@ -183,6 +189,9 @@ func (cmd *annotatecmd) exportTileDiffs(outw io.Writer, tilelib *tileLibrary) er for _, seqname := range seqnames { seqname := seqname throttle.Acquire() + if throttle.Err() != nil { + break + } go func() { defer throttle.Release() throttle.Report(cmd.annotateSequence(throttle, outch, tilelib, taglen, refname, seqname, refcs[seqname], len(refs) > 1)) @@ -194,6 +203,10 @@ func (cmd *annotatecmd) exportTileDiffs(outw io.Writer, tilelib *tileLibrary) er } func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string, tilelib *tileLibrary, taglen int, refname, seqname string, reftiles []tileLibRef, refnamecol bool) error { + refnamefield := "" + if refnamecol { + refnamefield = "," + trimFilenameForLabel(refname) + } var refseq []byte // tilestart[123] is the index into refseq // where the tile for tag 123 was placed. @@ -216,17 +229,29 @@ func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string tileend[libref.Tag] = len(refseq) } log.Infof("seq %s len(refseq) %d len(tilestart) %d", seqname, len(refseq), len(tilestart)) + // outtag is tag's index in the subset of tags that aren't + // dropped. If there are 10M tags and half are dropped by + // dropTiles, tag ranges from 0 to 10M-1 and outtag ranges + // from 0 to 5M-1. + // + // IOW, in the matrix built by cgs2array(), {tag} is + // represented by columns {outtag}*2 and {outtag}*2+1. + outcol := -1 for tag, tvs := range tilelib.variant { + if len(cmd.dropTiles) > tag && cmd.dropTiles[tag] { + continue + } tag := tagID(tag) + outcol++ + // Must shadow outcol var to use safely in goroutine below. + outcol := outcol refstart, ok := tilestart[tag] if !ok { - // Tag didn't place on this - // reference sequence. (It - // might place on the same - // chromosome in a genome - // anyway, but we don't output - // the annotations that would - // result.) + // Tag didn't place on this reference + // sequence. (It might place on the same + // chromosome in a genome anyway, but we don't + // output the annotations that would result.) + // outch <- fmt.Sprintf("%d,%d,-1%s\n", tag, outcol, refnamefield) continue } for variant := 1; variant <= len(tvs); variant++ { @@ -275,11 +300,10 @@ func (cmd *annotatecmd) annotateSequence(throttle *throttle, outch chan<- string } else { varid = fmt.Sprintf("%d", variant) } - refnamefield := "" - if refnamecol { - refnamefield = "," + trimFilenameForLabel(refname) + outch <- fmt.Sprintf("%d,%d,%s%s,%s:g.%s\n", tag, outcol, varid, refnamefield, seqname, diff.String()) + if cmd.reportAnnotation != nil { + cmd.reportAnnotation(tag, outcol, variant, refname, seqname, diff) } - outch <- fmt.Sprintf("%d,%s%s,%s:g.%s\n", tag, varid, refnamefield, seqname, diff.String()) } }() }