X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/30299451e290f999306373ff6a7ddfe58f6cfd28..ceee3803f9b1ad3eef8b7abd45dc1327c062738c:/slice.go diff --git a/slice.go b/slice.go index be5bb87904..d2073ad9c0 100644 --- a/slice.go +++ b/slice.go @@ -66,10 +66,10 @@ func (cmd *slicecmd) RunCommand(prog string, args []string, stdin io.Reader, std Name: "lightning slice", Client: arvados.NewClientFromEnv(), ProjectUUID: *projectUUID, - RAM: 200000000000, - VCPUs: 32, + RAM: 500000000000, + VCPUs: 64, Priority: *priority, - KeepCache: 50, + KeepCache: 2, APIAccess: true, } for i := range inputDirs { @@ -111,14 +111,14 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { } // dirNamespace[dir] is an int in [0,len(dirNamespace)), used below to // namespace variant numbers from different dirs. - dirNamespace := map[string]int{} + dirNamespace := map[string]tileVariantID{} for _, path := range infiles { dir, _ := filepath.Split(path) if _, ok := dirNamespace[dir]; !ok { - dirNamespace[dir] = len(dirNamespace) + dirNamespace[dir] = tileVariantID(len(dirNamespace)) } } - namespaces := len(dirNamespace) + namespaces := tileVariantID(len(dirNamespace)) var ( tagset [][]byte @@ -134,21 +134,21 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { ) throttle := throttle{Max: runtime.GOMAXPROCS(0)} - for _, path := range infiles { - path := path + for _, infile := range infiles { + infile := infile throttle.Acquire() go func() { defer throttle.Release() - f, err := open(path) + f, err := open(infile) if err != nil { throttle.Report(err) return } defer f.Close() - dir, _ := filepath.Split(path) + dir, _ := filepath.Split(infile) namespace := dirNamespace[dir] - log.Printf("reading %s (namespace %d)", path, namespace) - err = DecodeLibrary(f, strings.HasSuffix(path, ".gz"), func(ent *LibraryEntry) error { + log.Printf("reading %s (namespace %d)", infile, namespace) + err = DecodeLibrary(f, strings.HasSuffix(infile, ".gz"), func(ent *LibraryEntry) error { if err := throttle.Err(); err != nil { return err } @@ -175,8 +175,12 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { } atomic.AddInt64(&countTileVariants, int64(len(ent.TileVariants))) for _, tv := range ent.TileVariants { - tv.Variant = tileVariantID(int(tv.Variant)*namespaces + namespace) - err := encs[int(tv.Tag)/tagsPerFile].Encode(LibraryEntry{ + tv.Variant = tv.Variant*namespaces + namespace + fileno := 0 + if !tv.Ref { + fileno = int(tv.Tag) / tagsPerFile + } + err := encs[fileno].Encode(LibraryEntry{ TileVariants: []TileVariant{tv}, }) if err != nil { @@ -192,7 +196,7 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { for _, cg := range ent.CompactGenomes { for i, v := range cg.Variants { if v > 0 { - cg.Variants[i] = tileVariantID(int(v)*namespaces + namespace) + cg.Variants[i] = v*namespaces + namespace } } for i, enc := range encs { @@ -226,7 +230,7 @@ func Slice(tagsPerFile int, dstdir string, srcdirs []string) error { for _, cs := range ent.CompactSequences { for _, tseq := range cs.TileSequences { for i, libref := range tseq { - tseq[i].Variant = tileVariantID(int(libref.Variant)*namespaces + namespace) + tseq[i].Variant = libref.Variant*namespaces + namespace } } }