"fmt"
"io"
_ "net/http/pprof"
+ "os/exec"
+ "strings"
"git.arvados.org/arvados.git/sdk/go/arvados"
)
flags.SetOutput(stderr)
projectUUID := flags.String("project", "", "project `UUID` for output data")
inputFilename := flags.String("i", "-", "input `file`")
+ outputFilename := flags.String("o", "", "output `filename` (e.g., './plot.png')")
sampleListFilename := flags.String("samples", "", "use second column of `samples.csv` as complete list of sample IDs")
phenotypeFilename := flags.String("phenotype", "", "use `phenotype.csv` as id->phenotype mapping (column 0 is sample id)")
+ phenotypeCategoryColumn := flags.Int("phenotype-category-column", -1, "0-based column `index` of 2nd category in phenotype.csv file")
phenotypeColumn := flags.Int("phenotype-column", 1, "0-based column `index` of phenotype in phenotype.csv file")
priority := flags.Int("priority", 500, "container request priority")
+ runlocal := flags.Bool("local", false, "run on local host (default: run in an arvados container)")
err = flags.Parse(args)
if err == flag.ErrHelp {
err = nil
},
},
}
- err = runner.TranslatePaths(inputFilename, sampleListFilename, phenotypeFilename)
- if err != nil {
- return 1
+ if !*runlocal {
+ err = runner.TranslatePaths(inputFilename, sampleListFilename, phenotypeFilename)
+ if err != nil {
+ return 1
+ }
+ *outputFilename = "/mnt/output/plot.png"
+ }
+ args = []string{*inputFilename, *sampleListFilename, *phenotypeFilename, fmt.Sprintf("%d", *phenotypeCategoryColumn), fmt.Sprintf("%d", *phenotypeColumn), *outputFilename}
+ if *runlocal {
+ if *outputFilename == "" {
+ fmt.Fprintln(stderr, "error: must specify -o filename.png in local mode (or try -help)")
+ return 1
+ }
+ cmd := exec.Command("python3", append([]string{"-"}, args...)...)
+ cmd.Stdin = strings.NewReader(plotscript)
+ cmd.Stdout = stdout
+ cmd.Stderr = stderr
+ err = cmd.Run()
+ if err != nil {
+ return 1
+ }
+ return 0
}
runner.Prog = "python3"
- runner.Args = []string{"/plot.py", *inputFilename, *sampleListFilename, *phenotypeFilename, fmt.Sprintf("%d", *phenotypeColumn), "/mnt/output/plot.png"}
+ runner.Args = append([]string{"/plot.py"}, args...)
var output string
output, err = runner.Run()
if err != nil {
X = numpy.load(infile)
colors = None
+category = {}
+samples = []
if sys.argv[2]:
- samples = []
labels = {}
with open(sys.argv[2], 'rt', newline='') as samplelist:
for row in csv.reader(samplelist):
sampleid = row[1]
samples.append(sampleid)
- phenotype_column = int(sys.argv[4])
+ phenotype_category_column = int(sys.argv[4])
+ phenotype_column = int(sys.argv[5])
if os.path.isdir(sys.argv[3]):
phenotype_files = os.scandir(sys.argv[3])
else:
for sampleid in samples:
if tag in sampleid:
labels[sampleid] = label
+ if phenotype_category_column >= 0 and row[phenotype_category_column] != '0':
+ category[sampleid] = True
colors = []
labelcolors = {
'PUR': 'firebrick',
'GIH': 'blueviolet',
'PJL': 'blueviolet',
'5': 'blueviolet',
- '6': 'navy',
+ '6': 'black', # unknown?
}
for sampleid in samples:
if (sampleid in labels) and (labels[sampleid] in labelcolors):
from matplotlib.backends.backend_agg import FigureCanvasAgg
fig = Figure()
ax = fig.add_subplot(111)
-ax.scatter(X[:,0], X[:,1], c=colors, s=60, marker='o', alpha=0.5)
+for marker in ['o', 'x']:
+ x = []
+ y = []
+ if samples:
+ c = []
+ for i, sampleid in enumerate(samples):
+ if category.get(sampleid, False) == (marker == 'x'):
+ x.append(X[i,0])
+ y.append(X[i,1])
+ c.append(colors[i])
+ elif marker == 'x':
+ continue
+ else:
+ x = X[:,0]
+ y = X[:,1]
+ c = None
+ ax.scatter(x, y, c=c, s=60, marker=marker, alpha=0.5)
canvas = FigureCanvasAgg(fig)
-canvas.print_figure(sys.argv[5], dpi=80)
+canvas.print_figure(sys.argv[6], dpi=80)