18 "git.arvados.org/arvados.git/sdk/go/arvados"
19 "github.com/klauspost/pgzip"
20 log "github.com/sirupsen/logrus"
29 mapped map[string]map[tileLibRef]tileVariantID
34 func (cmd *merger) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
38 fmt.Fprintf(stderr, "%s\n", err)
41 flags := flag.NewFlagSet("", flag.ContinueOnError)
42 flags.SetOutput(stderr)
43 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
44 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
45 projectUUID := flags.String("project", "", "project `UUID` for output data")
46 priority := flags.Int("priority", 500, "container request priority")
47 outputFilename := flags.String("o", "-", "output `file`")
48 err = flags.Parse(args)
49 if err == flag.ErrHelp {
52 } else if err != nil {
56 cmd.inputs = flags.Args()
60 log.Println(http.ListenAndServe(*pprof, nil))
65 if *outputFilename != "-" {
66 err = errors.New("cannot specify output file in container mode: not implemented")
69 runner := arvadosContainerRunner{
70 Name: "lightning merge",
71 Client: arvados.NewClientFromEnv(),
72 ProjectUUID: *projectUUID,
79 for i := range cmd.inputs {
80 err = runner.TranslatePaths(&cmd.inputs[i])
85 runner.Args = append([]string{"merge", "-local=true",
86 "-o", "/mnt/output/library.gob.gz",
89 output, err = runner.Run()
93 fmt.Fprintln(stdout, output+"/library.gob.gz")
97 var outf, outw io.WriteCloser
98 if *outputFilename == "-" {
99 outw = nopCloser{stdout}
101 outf, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
106 if strings.HasSuffix(*outputFilename, ".gz") {
107 outw = pgzip.NewWriter(outf)
109 outw = nopCloser{outf}
112 bufw := bufio.NewWriterSize(outw, 64*1024*1024)
135 func (cmd *merger) setError(err error) {
137 case cmd.errs <- err:
142 func (cmd *merger) doMerge() error {
143 w := bufio.NewWriter(cmd.output)
144 encoder := gob.NewEncoder(w)
146 ctx, cancel := context.WithCancel(context.Background())
149 cmd.errs = make(chan error, 1)
150 cmd.tilelib = &tileLibrary{
155 cmd.mapped = map[string]map[tileLibRef]tileVariantID{}
156 for _, input := range cmd.inputs {
157 cmd.mapped[input] = map[tileLibRef]tileVariantID{}
160 var wg sync.WaitGroup
161 for _, input := range cmd.inputs {
162 rdr := ioutil.NopCloser(cmd.stdin)
165 rdr, err = open(input)
171 rdr = ioutil.NopCloser(bufio.NewReaderSize(rdr, 8*1024*1024))
173 go func(input string) {
175 log.Printf("%s: reading", input)
176 err := cmd.tilelib.LoadGob(ctx, rdr, strings.HasSuffix(input, ".gz"), nil)
178 cmd.setError(fmt.Errorf("%s: load failed: %w", input, err))
182 log.Printf("%s: done", input)
187 if err := <-cmd.errs; err != nil {
190 log.Print("flushing")