+// Copyright (C) The Lightning Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
package lightning
import (
outputStats string
matchChromosome *regexp.Regexp
encoder *gob.Encoder
+ retainAfterEncoding bool // keep imported genomes/refseqs in memory after writing to disk
batchArgs
}
Client: arvadosClientFromEnv,
ProjectUUID: cmd.projectUUID,
APIAccess: true,
- RAM: 360000000000,
+ RAM: 700000000000,
VCPUs: 96,
Priority: cmd.priority,
KeepCache: 1,
return nil
}
-func (cmd *importer) tileFasta(tilelib *tileLibrary, infile string) (tileSeq, []importStats, error) {
+func (cmd *importer) tileFasta(tilelib *tileLibrary, infile string, isRef bool) (tileSeq, []importStats, error) {
var input io.ReadCloser
input, err := open(infile)
if err != nil {
}
defer input.Close()
}
- return tilelib.TileFasta(infile, input, cmd.matchChromosome)
+ return tilelib.TileFasta(infile, input, cmd.matchChromosome, isRef)
}
func (cmd *importer) loadTagLibrary() (*tagLibrary, error) {
defer phases.Done()
log.Printf("%s starting", infile)
defer log.Printf("%s done", infile)
- tseqs, stats, err := cmd.tileFasta(tilelib, infile)
+ tseqs, stats, err := cmd.tileFasta(tilelib, infile, false)
allstats[idx*2] = stats
var kept, dropped int
variants[0], kept, dropped = tseqs.Variants()
defer phases.Done()
log.Printf("%s starting", infile2)
defer log.Printf("%s done", infile2)
- tseqs, stats, err := cmd.tileFasta(tilelib, infile2)
+ tseqs, stats, err := cmd.tileFasta(tilelib, infile2, false)
allstats[idx*2+1] = stats
var kept, dropped int
variants[1], kept, dropped = tseqs.Variants()
log.Printf("%s found %d unique tags plus %d repeats", infile2, kept, dropped)
-
return err
}
} else if fastaFilenameRe.MatchString(infile) {
defer phases.Done()
log.Printf("%s starting", infile)
defer log.Printf("%s done", infile)
- tseqs, stats, err := cmd.tileFasta(tilelib, infile)
+ tseqs, stats, err := cmd.tileFasta(tilelib, infile, true)
allstats[idx*2] = stats
if err != nil {
return err
totlen += len(tseq)
}
log.Printf("%s tiled %d seqs, total len %d", infile, len(tseqs), totlen)
+
+ if cmd.retainAfterEncoding {
+ tilelib.mtx.Lock()
+ if tilelib.refseqs == nil {
+ tilelib.refseqs = map[string]map[string][]tileLibRef{}
+ }
+ tilelib.refseqs[infile] = tseqs
+ tilelib.mtx.Unlock()
+ }
+
return cmd.encoder.Encode(LibraryEntry{
CompactSequences: []CompactSequence{{Name: infile, TileSequences: tseqs}},
})
if len(errs) > 0 {
return
}
+ variants := flatten(variants)
err := cmd.encoder.Encode(LibraryEntry{
- CompactGenomes: []CompactGenome{{Name: infile, Variants: flatten(variants)}},
+ CompactGenomes: []CompactGenome{{Name: infile, Variants: variants}},
})
if err != nil {
select {
default:
}
}
+ if cmd.retainAfterEncoding {
+ tilelib.mtx.Lock()
+ if tilelib.compactGenomes == nil {
+ tilelib.compactGenomes = make(map[string][]tileVariantID)
+ }
+ tilelib.compactGenomes[infile] = variants
+ tilelib.mtx.Unlock()
+ }
}()
}
go close(todo)
return
}
defer consensus.Wait()
- tileseq, stats, err = tilelib.TileFasta(fmt.Sprintf("%s phase %d", infile, phase+1), stdout, cmd.matchChromosome)
+ tileseq, stats, err = tilelib.TileFasta(fmt.Sprintf("%s phase %d", infile, phase+1), stdout, cmd.matchChromosome, false)
if err != nil {
return
}