15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 "github.com/kshedden/gonpy"
17 log "github.com/sirupsen/logrus"
20 type exportNumpy struct{}
22 func (cmd *exportNumpy) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
26 fmt.Fprintf(stderr, "%s\n", err)
29 flags := flag.NewFlagSet("", flag.ContinueOnError)
30 flags.SetOutput(stderr)
31 pprof := flags.String("pprof", "", "serve Go profile data at http://`[addr]:port`")
32 runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
33 projectUUID := flags.String("project", "", "project `UUID` for output data")
34 priority := flags.Int("priority", 500, "container request priority")
35 inputFilename := flags.String("i", "-", "input `file`")
36 outputFilename := flags.String("o", "-", "output `file`")
37 onehot := flags.Bool("one-hot", false, "recode tile variants as one-hot")
38 err = flags.Parse(args)
39 if err == flag.ErrHelp {
42 } else if err != nil {
48 log.Println(http.ListenAndServe(*pprof, nil))
53 if *outputFilename != "-" {
54 err = errors.New("cannot specify output file in container mode: not implemented")
57 runner := arvadosContainerRunner{
58 Name: "lightning export-numpy",
59 Client: arvados.NewClientFromEnv(),
60 ProjectUUID: *projectUUID,
65 err = runner.TranslatePaths(inputFilename)
69 runner.Args = []string{"export-numpy", "-local=true", fmt.Sprintf("-one-hot=%v", *onehot), "-i", *inputFilename, "-o", "/mnt/output/library.npy"}
71 output, err = runner.Run()
75 fmt.Fprintln(stdout, output+"/library.npy")
79 var input io.ReadCloser
80 if *inputFilename == "-" {
81 input = ioutil.NopCloser(stdin)
83 input, err = os.Open(*inputFilename)
89 cgs, err := ReadCompactGenomes(input)
97 sort.Slice(cgs, func(i, j int) bool { return cgs[i].Name < cgs[j].Name })
99 out, rows, cols := cgs2array(cgs)
101 var output io.WriteCloser
102 if *outputFilename == "-" {
103 output = nopCloser{stdout}
105 output, err = os.OpenFile(*outputFilename, os.O_CREATE|os.O_WRONLY, 0777)
111 bufw := bufio.NewWriter(output)
112 npw, err := gonpy.NewWriter(nopCloser{bufw})
117 out, cols = recodeOnehot(out, cols)
119 npw.Shape = []int{rows, cols}
132 func cgs2array(cgs []CompactGenome) (data []uint16, rows, cols int) {
134 for _, cg := range cgs {
135 if cols < len(cg.Variants) {
136 cols = len(cg.Variants)
139 data = make([]uint16, rows*cols)
140 for row, cg := range cgs {
141 for i, v := range cg.Variants {
142 data[row*cols+i] = uint16(v)
148 func recodeOnehot(in []uint16, incols int) ([]uint16, int) {
149 rows := len(in) / incols
150 maxvalue := make([]uint16, incols)
151 for row := 0; row < rows; row++ {
152 for col := 0; col < incols; col++ {
153 if v := in[row*incols+col]; maxvalue[col] < v {
158 outcol := make([]int, incols)
161 for incol, v := range maxvalue {
162 outcol[incol] = outcols
169 log.Printf("recodeOnehot: dropped %d input cols with zero maxvalue", dropped)
171 out := make([]uint16, rows*outcols)
172 for inidx, row := 0, 0; row < rows; row++ {
173 outrow := out[row*outcols:]
174 for col := 0; col < incols; col++ {
175 if v := in[inidx]; v > 0 {
176 outrow[outcol[col]+int(v)-1] = 1
184 type nopCloser struct {
188 func (nopCloser) Close() error { return nil }