X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/f88fe63a53a05bdaccd142abe34d117fde34c5b7..5435c35e1d163adf5a79597721016c253e7c7e1d:/slice.go diff --git a/slice.go b/slice.go index 0ee3feaedf..4dbe6ab652 100644 --- a/slice.go +++ b/slice.go @@ -66,10 +66,10 @@ func (cmd *slicecmd) RunCommand(prog string, args []string, stdin io.Reader, std Name: "lightning slice", Client: arvados.NewClientFromEnv(), ProjectUUID: *projectUUID, - RAM: 200000000000, - VCPUs: 32, + RAM: 500000000000, + VCPUs: 64, Priority: *priority, - KeepCache: 50, + KeepCache: 2, APIAccess: true, } for i := range inputDirs { @@ -111,14 +111,14 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { } // dirNamespace[dir] is an int in [0,len(dirNamespace)), used below to // namespace variant numbers from different dirs. - dirNamespace := map[string]int{} + dirNamespace := map[string]tileVariantID{} for _, path := range infiles { dir, _ := filepath.Split(path) if _, ok := dirNamespace[dir]; !ok { - dirNamespace[dir] = len(dirNamespace) + dirNamespace[dir] = tileVariantID(len(dirNamespace)) } } - namespaces := len(dirNamespace) + namespaces := tileVariantID(len(dirNamespace)) var ( tagset [][]byte @@ -136,19 +136,16 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { throttle := throttle{Max: runtime.GOMAXPROCS(0)} for _, infile := range infiles { infile := infile - throttle.Acquire() - go func() { - defer throttle.Release() + throttle.Go(func() error { f, err := open(infile) if err != nil { - throttle.Report(err) - return + return err } defer f.Close() dir, _ := filepath.Split(infile) namespace := dirNamespace[dir] log.Printf("reading %s (namespace %d)", infile, namespace) - err = DecodeLibrary(f, strings.HasSuffix(infile, ".gz"), func(ent *LibraryEntry) error { + return DecodeLibrary(f, strings.HasSuffix(infile, ".gz"), func(ent *LibraryEntry) error { if err := throttle.Err(); err != nil { return err } @@ -175,8 +172,12 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { } atomic.AddInt64(&countTileVariants, int64(len(ent.TileVariants))) for _, tv := range ent.TileVariants { - tv.Variant = tileVariantID(int(tv.Variant)*namespaces + namespace) - err := encs[int(tv.Tag)/tagsPerFile].Encode(LibraryEntry{ + tv.Variant = tv.Variant*namespaces + namespace + fileno := 0 + if !tv.Ref { + fileno = int(tv.Tag) / tagsPerFile + } + err := encs[fileno].Encode(LibraryEntry{ TileVariants: []TileVariant{tv}, }) if err != nil { @@ -192,7 +193,7 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { for _, cg := range ent.CompactGenomes { for i, v := range cg.Variants { if v > 0 { - cg.Variants[i] = tileVariantID(int(v)*namespaces + namespace) + cg.Variants[i] = v*namespaces + namespace } } for i, enc := range encs { @@ -226,7 +227,7 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { for _, cs := range ent.CompactSequences { for _, tseq := range cs.TileSequences { for i, libref := range tseq { - tseq[i].Variant = tileVariantID(int(libref.Variant)*namespaces + namespace) + tseq[i].Variant = libref.Variant*namespaces + namespace } } } @@ -237,8 +238,7 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { } return nil }) - throttle.Report(err) - }() + }) } throttle.Wait() if throttle.Err() != nil {