Client: arvadosClientFromEnv,
ProjectUUID: cmd.projectUUID,
APIAccess: true,
- RAM: 700000000000,
+ RAM: 350000000000,
VCPUs: 96,
Priority: cmd.priority,
KeepCache: 1,
return nil
}
-func (cmd *importer) tileFasta(tilelib *tileLibrary, infile string) (tileSeq, []importStats, error) {
+func (cmd *importer) tileFasta(tilelib *tileLibrary, infile string, isRef bool) (tileSeq, []importStats, error) {
var input io.ReadCloser
input, err := open(infile)
if err != nil {
}
defer input.Close()
}
- return tilelib.TileFasta(infile, input, cmd.matchChromosome)
+ return tilelib.TileFasta(infile, input, cmd.matchChromosome, isRef)
}
func (cmd *importer) loadTagLibrary() (*tagLibrary, error) {
var (
vcfFilenameRe = regexp.MustCompile(`\.vcf(\.gz)?$`)
- fasta1FilenameRe = regexp.MustCompile(`\.1\.fa(sta)?(\.gz)?$`)
- fasta2FilenameRe = regexp.MustCompile(`\.2\.fa(sta)?(\.gz)?$`)
+ fasta1FilenameRe = regexp.MustCompile(`\.1\.fa(sta)?(\.fa(sta)?)?(\.gz)?$`)
+ fasta2FilenameRe = regexp.MustCompile(`\.2\.fa(sta)?(\.fa(sta)?)?(\.gz)?$`)
fastaFilenameRe = regexp.MustCompile(`\.fa(sta)?(\.gz)?$`)
)
if fasta1FilenameRe.MatchString(infile) {
todo <- func() error {
defer phases.Done()
- log.Printf("%s starting", infile)
+ log.Printf("%s (sample.1) starting tiling", infile)
defer log.Printf("%s done", infile)
- tseqs, stats, err := cmd.tileFasta(tilelib, infile)
+ tseqs, stats, err := cmd.tileFasta(tilelib, infile, false)
allstats[idx*2] = stats
var kept, dropped int
variants[0], kept, dropped = tseqs.Variants()
- log.Printf("%s found %d unique tags plus %d repeats", infile, kept, dropped)
+ log.Printf("%s (sample.1) found %d unique tags plus %d repeats", infile, kept, dropped)
return err
}
- infile2 := fasta1FilenameRe.ReplaceAllString(infile, `.2.fa$1$2`)
+ infile2 := fasta1FilenameRe.ReplaceAllString(infile, `.2.fa$1$2$4`)
todo <- func() error {
defer phases.Done()
- log.Printf("%s starting", infile2)
+ log.Printf("%s (sample.2) starting tiling", infile2)
defer log.Printf("%s done", infile2)
- tseqs, stats, err := cmd.tileFasta(tilelib, infile2)
+ tseqs, stats, err := cmd.tileFasta(tilelib, infile2, false)
allstats[idx*2+1] = stats
var kept, dropped int
variants[1], kept, dropped = tseqs.Variants()
- log.Printf("%s found %d unique tags plus %d repeats", infile2, kept, dropped)
+ log.Printf("%s (sample.2) found %d unique tags plus %d repeats", infile2, kept, dropped)
return err
}
} else if fastaFilenameRe.MatchString(infile) {
todo <- func() error {
defer phases.Done()
defer phases.Done()
- log.Printf("%s starting", infile)
+ log.Printf("%s (reference) starting tiling", infile)
defer log.Printf("%s done", infile)
- tseqs, stats, err := cmd.tileFasta(tilelib, infile)
+ tseqs, stats, err := cmd.tileFasta(tilelib, infile, true)
allstats[idx*2] = stats
if err != nil {
return err
for _, tseq := range tseqs {
totlen += len(tseq)
}
- log.Printf("%s tiled %d seqs, total len %d", infile, len(tseqs), totlen)
+ log.Printf("%s (reference) tiled %d seqs, total len %d", infile, len(tseqs), totlen)
if cmd.retainAfterEncoding {
tilelib.mtx.Lock()
go close(todo)
var tileJobs sync.WaitGroup
var running int64
- for i := 0; i < runtime.GOMAXPROCS(-1)*2; i++ {
+ for i := 0; i < runtime.GOMAXPROCS(-1); i++ {
tileJobs.Add(1)
atomic.AddInt64(&running, 1)
go func() {
return
}
defer consensus.Wait()
- tileseq, stats, err = tilelib.TileFasta(fmt.Sprintf("%s phase %d", infile, phase+1), stdout, cmd.matchChromosome)
+ tileseq, stats, err = tilelib.TileFasta(fmt.Sprintf("%s phase %d", infile, phase+1), stdout, cmd.matchChromosome, false)
if err != nil {
return
}