X-Git-Url: https://git.arvados.org/lightning.git/blobdiff_plain/fd2049ccfde932448f570eeda6a1ecc1b663f8e0..f1cff36f8beba7ef4b494120f1577224417c125b:/import.go diff --git a/import.go b/import.go index 2f9888ce39..2c502a47cf 100644 --- a/import.go +++ b/import.go @@ -4,6 +4,7 @@ import ( "bufio" "compress/gzip" "encoding/gob" + "errors" "flag" "fmt" "io" @@ -20,11 +21,16 @@ import ( "sync" "sync/atomic" "time" + + "git.arvados.org/arvados.git/sdk/go/arvados" ) type importer struct { tagLibraryFile string refFile string + outputFile string + projectUUID string + runLocal bool encoder *gob.Encoder } @@ -39,6 +45,9 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std flags.SetOutput(stderr) flags.StringVar(&cmd.tagLibraryFile, "tag-library", "", "tag library fasta `file`") flags.StringVar(&cmd.refFile, "ref", "", "reference fasta `file`") + flags.StringVar(&cmd.outputFile, "o", "-", "output `file`") + flags.StringVar(&cmd.projectUUID, "project", "", "project `UUID` for output data") + flags.BoolVar(&cmd.runLocal, "local", false, "run on local host (default: run in an arvados container)") pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`") err = flags.Parse(args) if err == flag.ErrHelp { @@ -46,8 +55,8 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std return 0 } else if err != nil { return 2 - } else if cmd.refFile == "" || cmd.tagLibraryFile == "" { - fmt.Fprintln(os.Stderr, "cannot run without -tag-library and -ref arguments") + } else if cmd.tagLibraryFile == "" { + fmt.Fprintln(os.Stderr, "cannot import without -tag-library argument") return 2 } else if flags.NArg() == 0 { flags.Usage() @@ -60,6 +69,44 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std }() } + if !cmd.runLocal { + runner := arvadosContainerRunner{ + Name: "lightning import", + Client: arvados.NewClientFromEnv(), + ProjectUUID: cmd.projectUUID, + RAM: 30000000000, + VCPUs: 16, + } + err = runner.TranslatePaths(&cmd.tagLibraryFile, &cmd.refFile, &cmd.outputFile) + if err != nil { + return 1 + } + inputs := flags.Args() + for i := range inputs { + err = runner.TranslatePaths(&inputs[i]) + if err != nil { + return 1 + } + } + if cmd.outputFile == "-" { + cmd.outputFile = "/mnt/output/library.gob" + } else { + // Not yet implemented, but this should write + // the collection to an existing collection, + // possibly even an in-place update. + err = errors.New("cannot specify output file in container mode: not implemented") + return 1 + } + runner.Args = append([]string{"import", "-local=true", "-tag-library", cmd.tagLibraryFile, "-ref", cmd.refFile, "-o", cmd.outputFile}, inputs...) + var output string + output, err = runner.Run() + if err != nil { + return 1 + } + fmt.Fprintln(stdout, output+"/library.gob") + return 0 + } + infiles, err := listInputFiles(flags.Args()) if err != nil { return 1 @@ -74,13 +121,29 @@ func (cmd *importer) RunCommand(prog string, args []string, stdin io.Reader, std log.Printf("tilelib.Len() == %d", tilelib.Len()) } }() - w := bufio.NewWriter(stdout) - cmd.encoder = gob.NewEncoder(w) - err = cmd.tileGVCFs(tilelib, infiles) + + var output io.WriteCloser + if cmd.outputFile == "-" { + output = nopCloser{stdout} + } else { + output, err = os.OpenFile(cmd.outputFile, os.O_CREATE|os.O_WRONLY, 0777) + if err != nil { + return 1 + } + defer output.Close() + } + bufw := bufio.NewWriter(output) + cmd.encoder = gob.NewEncoder(bufw) + + err = cmd.tileInputs(tilelib, infiles) + if err != nil { + return 1 + } + err = bufw.Flush() if err != nil { return 1 } - err = w.Flush() + err = output.Close() if err != nil { return 1 } @@ -174,7 +237,7 @@ func listInputFiles(paths []string) (files []string, err error) { return } -func (cmd *importer) tileGVCFs(tilelib *tileLibrary, infiles []string) error { +func (cmd *importer) tileInputs(tilelib *tileLibrary, infiles []string) error { starttime := time.Now() errs := make(chan error, 1) todo := make(chan func() error, len(infiles)*2) @@ -275,6 +338,10 @@ func (cmd *importer) tileGVCFs(tilelib *tileLibrary, infiles []string) error { } func (cmd *importer) tileGVCF(tilelib *tileLibrary, infile string, phase int) (tileseq tileSeq, err error) { + if cmd.refFile == "" { + err = errors.New("cannot import vcf: reference data (-ref) not specified") + return + } args := []string{"bcftools", "consensus", "--fasta-ref", cmd.refFile, "-H", fmt.Sprint(phase + 1), infile} indexsuffix := ".tbi" if _, err := os.Stat(infile + ".csi"); err == nil {