// propagated to crunch-run via SLURM.
os.Setenv("ARVADOS_API_HOST", theConfig.Client.APIHost)
os.Setenv("ARVADOS_API_TOKEN", theConfig.Client.AuthToken)
- os.Setenv("ARVADOS_API_INSECURE", "")
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "")
if theConfig.Client.Insecure {
- os.Setenv("ARVADOS_API_INSECURE", "1")
+ os.Setenv("ARVADOS_API_HOST_INSECURE", "1")
}
- os.Setenv("ARVADOS_KEEP_SERVICES", "")
+ os.Setenv("ARVADOS_KEEP_SERVICES", strings.Join(theConfig.Client.KeepServiceURIs, " "))
os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
} else {
log.Printf("warning: Client credentials missing from config, so falling back on environment variables (deprecated).")
PollInterval: time.Duration(theConfig.PollPeriod),
DoneProcessing: make(chan struct{})}
- if _, err := daemon.SdNotify("READY=1"); err != nil {
+ if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
log.Printf("Error notifying init daemon: %v", err)
}
sbatchArgs = append(sbatchArgs, fmt.Sprintf("--job-name=%s", container.UUID))
sbatchArgs = append(sbatchArgs, fmt.Sprintf("--mem-per-cpu=%d", int(memPerCPU)))
sbatchArgs = append(sbatchArgs, fmt.Sprintf("--cpus-per-task=%d", container.RuntimeConstraints.VCPUs))
- if container.RuntimeConstraints.Partition != nil {
- sbatchArgs = append(sbatchArgs, fmt.Sprintf("--partition=%s", strings.Join(container.RuntimeConstraints.Partition, ",")))
+ if container.SchedulingParameters.Partitions != nil {
+ sbatchArgs = append(sbatchArgs, fmt.Sprintf("--partition=%s", strings.Join(container.SchedulingParameters.Partitions, ",")))
}
return exec.Command("sbatch", sbatchArgs...)
squeueUpdater.SlurmLock.Lock()
defer squeueUpdater.SlurmLock.Unlock()
+ log.Printf("sbatch starting: %+q", cmd.Args)
err := cmd.Start()
if err != nil {
- submitErr = fmt.Errorf("Error starting %v: %v", cmd.Args, err)
+ submitErr = fmt.Errorf("Error starting sbatch: %v", err)
return
}
b, _ := ioutil.ReadAll(stdoutReader)
stdoutReader.Close()
stdoutChan <- b
+ close(stdoutChan)
}()
stderrChan := make(chan []byte)
b, _ := ioutil.ReadAll(stderrReader)
stderrReader.Close()
stderrChan <- b
+ close(stderrChan)
}()
// Send a tiny script on stdin to execute the crunch-run command
io.WriteString(stdinWriter, execScript(append(crunchRunCommand, container.UUID)))
stdinWriter.Close()
- err = cmd.Wait()
-
stdoutMsg := <-stdoutChan
stderrmsg := <-stderrChan
- close(stdoutChan)
- close(stderrChan)
+ err = cmd.Wait()
if err != nil {
submitErr = fmt.Errorf("Container submission failed: %v: %v (stderr: %q)", cmd.Args, err, stderrmsg)
// Mutex between squeue sync and running sbatch or scancel.
squeueUpdater.SlurmLock.Lock()
- err := scancelCmd(container).Run()
+ cmd := scancelCmd(container)
+ msg, err := cmd.CombinedOutput()
squeueUpdater.SlurmLock.Unlock()
if err != nil {
- log.Printf("Error stopping container %s with scancel: %v",
- container.UUID, err)
+ log.Printf("Error stopping container %s with %v %v: %v %v",
+ container.UUID, cmd.Path, cmd.Args, err, string(msg))
if squeueUpdater.CheckSqueue(container.UUID) {
log.Printf("Container %s is still in squeue after scancel.",
container.UUID)