Log # low quality tile variants during import.
authorTom Clegg <tom@tomclegg.ca>
Fri, 29 Jan 2021 01:34:07 +0000 (20:34 -0500)
committerTom Clegg <tom@tomclegg.ca>
Fri, 29 Jan 2021 01:34:07 +0000 (20:34 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

tilelib.go

index 926c0324a2c74f3b59a04ea97302803012629541..8c88956b97685df9a24176a812999103543d5051 100644 (file)
@@ -324,6 +324,7 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChro
                log.Infof("%s %s getting %d librefs", filelabel, job.label, len(found))
                throttle := &throttle{Max: runtime.NumCPU()}
                path = path[:len(found)]
+               var lowquality int64
                for i, f := range found {
                        i, f := i, f
                        throttle.Acquire()
@@ -341,6 +342,9 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChro
                                        endpos = found[i+1].pos + taglen
                                }
                                path[i] = tilelib.getRef(f.tagid, job.fasta[startpos:endpos])
+                               if countBases(job.fasta[startpos:endpos]) != endpos-startpos {
+                                       atomic.AddInt64(&lowquality, 1)
+                               }
                        }()
                }
                throttle.Wait()
@@ -352,7 +356,7 @@ func (tilelib *tileLibrary) TileFasta(filelabel string, rdr io.Reader, matchChro
                ret[job.label] = pathcopy
 
                basesIn := countBases(job.fasta)
-               log.Infof("%s %s fasta in %d coverage in %d path len %d skipped-out-of-order %d", filelabel, job.label, len(job.fasta), basesIn, len(path), skipped)
+               log.Infof("%s %s fasta in %d coverage in %d path len %d low-quality %d skipped-out-of-order %d", filelabel, job.label, len(job.fasta), basesIn, len(path), lowquality, skipped)
                stats = append(stats, importStats{
                        InputFile:              filelabel,
                        InputLabel:             job.label,