Distribute genomes across output files.
[lightning.git] / tilelib.go
index b3c43f181122e42a55eacf8a0dcca1bfb4a0e676..3852b4d093d6175294477c724f764d65dba5b3e8 100644 (file)
@@ -260,25 +260,35 @@ func (tilelib *tileLibrary) LoadDir(ctx context.Context, path string, onLoadGeno
                                return
                        }
                        defer f.Close()
+                       defer log.Infof("LoadDir: finished reading %s", path)
                        errs <- DecodeLibrary(f, strings.HasSuffix(path, ".gz"), func(ent *LibraryEntry) error {
                                if ctx.Err() != nil {
                                        return ctx.Err()
                                }
-                               mtx.Lock()
-                               defer mtx.Unlock()
-                               if tilelib.taglib == nil || tilelib.taglib.Len() != len(ent.TagSet) {
-                                       // load first set of tags, or
-                                       // report mismatch if 2 sets
-                                       // have different #tags.
-                                       if err := tilelib.loadTagSet(ent.TagSet); err != nil {
-                                               return err
+                               if len(ent.TagSet) > 0 {
+                                       mtx.Lock()
+                                       if tilelib.taglib == nil || tilelib.taglib.Len() != len(ent.TagSet) {
+                                               // load first set of tags, or
+                                               // report mismatch if 2 sets
+                                               // have different #tags.
+                                               if err := tilelib.loadTagSet(ent.TagSet); err != nil {
+                                                       mtx.Unlock()
+                                                       return err
+                                               }
                                        }
+                                       mtx.Unlock()
                                }
-                               if err := tilelib.loadTileVariants(ent.TileVariants, variantmap); err != nil {
-                                       return err
+                               variantmapadd := map[tileLibRef]tileVariantID{}
+                               for _, tv := range ent.TileVariants {
+                                       variantmapadd[tileLibRef{Tag: tv.Tag, Variant: tv.Variant}] = tilelib.getRef(tv.Tag, tv.Sequence).Variant
                                }
+                               mtx.Lock()
                                cgs = append(cgs, ent.CompactGenomes...)
                                cseqs = append(cseqs, ent.CompactSequences...)
+                               for k, v := range variantmapadd {
+                                       variantmap[k] = v
+                               }
+                               mtx.Unlock()
                                return nil
                        })
                }()
@@ -327,6 +337,14 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
        for i := range encoders {
                encoders[i] = gob.NewEncoder(zws[i])
        }
+
+       cgnames := make([]string, 0, len(tilelib.compactGenomes))
+       for name := range tilelib.compactGenomes {
+               cgnames = append(cgnames, name)
+       }
+       sort.Strings(cgnames)
+
+       log.Infof("WriteDir: writing %d files", nfiles)
        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
        errs := make(chan error, nfiles)
@@ -339,18 +357,8 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                                return
                        }
                        if start == 0 {
-                               // For now, just write all the genomes and refs
-                               // to the first file
-                               for name, cg := range tilelib.compactGenomes {
-                                       err := encoders[start].Encode(LibraryEntry{CompactGenomes: []CompactGenome{{
-                                               Name:     name,
-                                               Variants: cg,
-                                       }}})
-                                       if err != nil {
-                                               errs <- err
-                                               return
-                                       }
-                               }
+                               // For now, just write all the refs to
+                               // the first file
                                for name, tseqs := range tilelib.refseqs {
                                        err := encoders[start].Encode(LibraryEntry{CompactSequences: []CompactSequence{{
                                                Name:          name,
@@ -362,6 +370,16 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                                        }
                                }
                        }
+                       for i := start; i < len(cgnames); i += nfiles {
+                               err := encoders[start].Encode(LibraryEntry{CompactGenomes: []CompactGenome{{
+                                       Name:     cgnames[i],
+                                       Variants: tilelib.compactGenomes[cgnames[i]],
+                               }}})
+                               if err != nil {
+                                       errs <- err
+                                       return
+                               }
+                       }
                        tvs := []TileVariant{}
                        for tag := start; tag < len(tilelib.variant) && ctx.Err() == nil; tag += nfiles {
                                tvs = tvs[:0]
@@ -388,6 +406,7 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                        return err
                }
        }
+       log.Info("WriteDir: flushing")
        for i := range zws {
                err := zws[i].Close()
                if err != nil {
@@ -402,6 +421,7 @@ func (tilelib *tileLibrary) WriteDir(dir string) error {
                        return err
                }
        }
+       log.Info("WriteDir: done")
        return nil
 }