projects
/
lightning.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Export hgvs-onehot.
[lightning.git]
/
import.go
diff --git
a/import.go
b/import.go
index f4e0cd0008c218cf23553e12be9d174031e06d2c..d229160a8bff9ad4d7f7f37f7c10f3481f3a2aa9 100644
(file)
--- a/
import.go
+++ b/
import.go
@@
-1,4
+1,4
@@
-package
main
+package
lightning
import (
"bufio"
import (
"bufio"
@@
-42,6
+42,7
@@
type importer struct {
outputStats string
matchChromosome *regexp.Regexp
encoder *gob.Encoder
outputStats string
matchChromosome *regexp.Regexp
encoder *gob.Encoder
+ retainAfterEncoding bool // keep imported genomes/refseqs in memory after writing to disk
batchArgs
}
batchArgs
}
@@
-180,8
+181,8
@@
func (cmd *importer) runBatches(stdout io.Writer, inputs []string) error {
Client: arvadosClientFromEnv,
ProjectUUID: cmd.projectUUID,
APIAccess: true,
Client: arvadosClientFromEnv,
ProjectUUID: cmd.projectUUID,
APIAccess: true,
- RAM:
3
00000000000,
- VCPUs:
64
,
+ RAM:
7
00000000000,
+ VCPUs:
96
,
Priority: cmd.priority,
KeepCache: 1,
}
Priority: cmd.priority,
KeepCache: 1,
}
@@
-361,7
+362,6
@@
func (cmd *importer) tileInputs(tilelib *tileLibrary, infiles []string) error {
var kept, dropped int
variants[1], kept, dropped = tseqs.Variants()
log.Printf("%s found %d unique tags plus %d repeats", infile2, kept, dropped)
var kept, dropped int
variants[1], kept, dropped = tseqs.Variants()
log.Printf("%s found %d unique tags plus %d repeats", infile2, kept, dropped)
-
return err
}
} else if fastaFilenameRe.MatchString(infile) {
return err
}
} else if fastaFilenameRe.MatchString(infile) {
@@
-380,6
+380,16
@@
func (cmd *importer) tileInputs(tilelib *tileLibrary, infiles []string) error {
totlen += len(tseq)
}
log.Printf("%s tiled %d seqs, total len %d", infile, len(tseqs), totlen)
totlen += len(tseq)
}
log.Printf("%s tiled %d seqs, total len %d", infile, len(tseqs), totlen)
+
+ if cmd.retainAfterEncoding {
+ tilelib.mtx.Lock()
+ if tilelib.refseqs == nil {
+ tilelib.refseqs = map[string]map[string][]tileLibRef{}
+ }
+ tilelib.refseqs[infile] = tseqs
+ tilelib.mtx.Unlock()
+ }
+
return cmd.encoder.Encode(LibraryEntry{
CompactSequences: []CompactSequence{{Name: infile, TileSequences: tseqs}},
})
return cmd.encoder.Encode(LibraryEntry{
CompactSequences: []CompactSequence{{Name: infile, TileSequences: tseqs}},
})
@@
-411,8
+421,9
@@
func (cmd *importer) tileInputs(tilelib *tileLibrary, infiles []string) error {
if len(errs) > 0 {
return
}
if len(errs) > 0 {
return
}
+ variants := flatten(variants)
err := cmd.encoder.Encode(LibraryEntry{
err := cmd.encoder.Encode(LibraryEntry{
- CompactGenomes: []CompactGenome{{Name: infile, Variants:
flatten(variants)
}},
+ CompactGenomes: []CompactGenome{{Name: infile, Variants:
variants
}},
})
if err != nil {
select {
})
if err != nil {
select {
@@
-420,6
+431,14
@@
func (cmd *importer) tileInputs(tilelib *tileLibrary, infiles []string) error {
default:
}
}
default:
}
}
+ if cmd.retainAfterEncoding {
+ tilelib.mtx.Lock()
+ if tilelib.compactGenomes == nil {
+ tilelib.compactGenomes = make(map[string][]tileVariantID)
+ }
+ tilelib.compactGenomes[infile] = variants
+ tilelib.mtx.Unlock()
+ }
}()
}
go close(todo)
}()
}
go close(todo)
@@
-452,6
+471,13
@@
func (cmd *importer) tileInputs(tilelib *tileLibrary, infiles []string) error {
}()
}
tileJobs.Wait()
}()
}
tileJobs.Wait()
+ if len(errs) > 0 {
+ // Must not wait on encodeJobs in this case. If the
+ // tileJobs goroutines exited early, some funcs in
+ // todo haven't been called, so the corresponding
+ // encodeJobs will wait forever.
+ return <-errs
+ }
encodeJobs.Wait()
go close(errs)
encodeJobs.Wait()
go close(errs)