Distribute genomes across output files.
authorTom Clegg <tom@tomclegg.ca>
Thu, 17 Jun 2021 14:22:15 +0000 (10:22 -0400)
committerTom Clegg <tom@tomclegg.ca>
Thu, 17 Jun 2021 14:22:15 +0000 (10:22 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

tilelib.go

index e0d0e1bc4f4a603b7ec8e3801a26d71031e32d9d..3852b4d093d6175294477c724f764d65dba5b3e8 100644 (file)
@@ -338,6 +338,12 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                encoders[i] = gob.NewEncoder(zws[i])
        }
 
+       cgnames := make([]string, 0, len(tilelib.compactGenomes))
+       for name := range tilelib.compactGenomes {
+               cgnames = append(cgnames, name)
+       }
+       sort.Strings(cgnames)
+
        log.Infof("WriteDir: writing %d files", nfiles)
        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
@@ -351,18 +357,8 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                                return
                        }
                        if start == 0 {
-                               // For now, just write all the genomes and refs
-                               // to the first file
-                               for name, cg := range tilelib.compactGenomes {
-                                       err := encoders[start].Encode(LibraryEntry{CompactGenomes: []CompactGenome{{
-                                               Name:     name,
-                                               Variants: cg,
-                                       }}})
-                                       if err != nil {
-                                               errs <- err
-                                               return
-                                       }
-                               }
+                               // For now, just write all the refs to
+                               // the first file
                                for name, tseqs := range tilelib.refseqs {
                                        err := encoders[start].Encode(LibraryEntry{CompactSequences: []CompactSequence{{
                                                Name:          name,
@@ -374,6 +370,16 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                                        }
                                }
                        }
+                       for i := start; i < len(cgnames); i += nfiles {
+                               err := encoders[start].Encode(LibraryEntry{CompactGenomes: []CompactGenome{{
+                                       Name:     cgnames[i],
+                                       Variants: tilelib.compactGenomes[cgnames[i]],
+                               }}})
+                               if err != nil {
+                                       errs <- err
+                                       return
+                               }
+                       }
                        tvs := []TileVariant{}
                        for tag := start; tag < len(tilelib.variant) && ctx.Err() == nil; tag += nfiles {
                                tvs = tvs[:0]