"os"
"os/exec"
"os/signal"
+ "os/user"
"path"
"path/filepath"
"regexp"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"git.arvados.org/arvados.git/sdk/go/manifest"
+ "golang.org/x/sys/unix"
)
type command struct{}
sort.Strings(binds)
for _, bind := range binds {
- mnt, ok := runner.Container.Mounts[bind]
- if !ok {
+ mnt, notSecret := runner.Container.Mounts[bind]
+ if !notSecret {
mnt = runner.SecretMounts[bind]
}
if bind == "stdout" || bind == "stderr" {
}
} else {
src = fmt.Sprintf("%s/tmp%d", runner.ArvMountPoint, tmpcount)
- arvMountCmd = append(arvMountCmd, "--mount-tmp")
- arvMountCmd = append(arvMountCmd, fmt.Sprintf("tmp%d", tmpcount))
+ arvMountCmd = append(arvMountCmd, "--mount-tmp", fmt.Sprintf("tmp%d", tmpcount))
tmpcount++
}
if mnt.Writable {
if err != nil {
return nil, fmt.Errorf("writing temp file: %v", err)
}
- if strings.HasPrefix(bind, runner.Container.OutputPath+"/") {
+ if strings.HasPrefix(bind, runner.Container.OutputPath+"/") && (notSecret || runner.Container.Mounts[runner.Container.OutputPath].Kind != "collection") {
+ // In most cases, if the container
+ // specifies a literal file inside the
+ // output path, we copy it into the
+ // output directory (either a mounted
+ // collection or a staging area on the
+ // host fs). If it's a secret, it will
+ // be skipped when copying output from
+ // staging to Keep later.
copyFiles = append(copyFiles, copyFile{tmpfn, runner.HostOutputDir + bind[len(runner.Container.OutputPath):]})
} else {
+ // If a secret is outside OutputPath,
+ // we bind mount the secret file
+ // directly just like other mounts. We
+ // also use this strategy when a
+ // secret is inside OutputPath but
+ // OutputPath is a live collection, to
+ // avoid writing the secret to
+ // Keep. Attempting to remove a
+ // bind-mounted secret file from
+ // inside the container will return a
+ // "Device or resource busy" error
+ // that might not be handled well by
+ // the container, which is why we
+ // don't use this strategy when
+ // OutputPath is a staging directory.
bindmounts[bind] = bindmount{HostPath: tmpfn, ReadOnly: true}
}
runner.executorStdin = stdin
runner.executorStdout = stdout
runner.executorStderr = stderr
+
+ if runner.Container.RuntimeConstraints.CUDA.DeviceCount > 0 {
+ nvidiaModprobe(runner.CrunchLog)
+ }
+
return runner.executor.Create(containerSpec{
- Image: imageID,
- VCPUs: runner.Container.RuntimeConstraints.VCPUs,
- RAM: ram,
- WorkingDir: workdir,
- Env: env,
- BindMounts: bindmounts,
- Command: runner.Container.Command,
- EnableNetwork: enableNetwork,
- NetworkMode: runner.networkMode,
- CgroupParent: runner.setCgroupParent,
- Stdin: stdin,
- Stdout: stdout,
- Stderr: stderr,
+ Image: imageID,
+ VCPUs: runner.Container.RuntimeConstraints.VCPUs,
+ RAM: ram,
+ WorkingDir: workdir,
+ Env: env,
+ BindMounts: bindmounts,
+ Command: runner.Container.Command,
+ EnableNetwork: enableNetwork,
+ CUDADeviceCount: runner.Container.RuntimeConstraints.CUDA.DeviceCount,
+ NetworkMode: runner.networkMode,
+ CgroupParent: runner.setCgroupParent,
+ Stdin: stdin,
+ Stdout: stdout,
+ Stderr: stderr,
})
}
}
runner.ExitCode = &exitcode
+ extra := ""
+ if exitcode&0x80 != 0 {
+ // Convert raw exit status (0x80 + signal number) to a
+ // string to log after the code, like " (signal 101)"
+ // or " (signal 9, killed)"
+ sig := syscall.WaitStatus(exitcode).Signal()
+ if name := unix.SignalName(sig); name != "" {
+ extra = fmt.Sprintf(" (signal %d, %s)", sig, name)
+ } else {
+ extra = fmt.Sprintf(" (signal %d)", sig)
+ }
+ }
+ runner.CrunchLog.Printf("Container exited with status code %d%s", exitcode, extra)
+
var returnErr error
if err = runner.executorStdin.Close(); err != nil {
err = fmt.Errorf("error closing container stdin: %s", err)
// Run the full container lifecycle.
func (runner *ContainerRunner) Run() (err error) {
runner.CrunchLog.Printf("crunch-run %s started", cmd.Version.String())
+ runner.CrunchLog.Printf("%s", currentUserAndGroups())
runner.CrunchLog.Printf("Executing container '%s' using %s runtime", runner.Container.UUID, runner.executor.Runtime())
hostname, hosterr := os.Hostname()
if ok, code := cmd.ParseFlags(flags, prog, args, "container-uuid", stderr); !ok {
return code
- } else if flags.NArg() != 1 {
+ } else if !*list && flags.NArg() != 1 {
fmt.Fprintf(stderr, "missing required argument: container-uuid (try -help)\n")
return 2
}
os.Setenv("ARVADOS_KEEP_SERVICES", url)
return cmd, nil
}
+
+// return current uid, gid, groups in a format suitable for logging:
+// "crunch-run process has uid=1234(arvados) gid=1234(arvados)
+// groups=1234(arvados),114(fuse)"
+func currentUserAndGroups() string {
+ u, err := user.Current()
+ if err != nil {
+ return fmt.Sprintf("error getting current user ID: %s", err)
+ }
+ s := fmt.Sprintf("crunch-run process has uid=%s(%s) gid=%s", u.Uid, u.Username, u.Gid)
+ if g, err := user.LookupGroupId(u.Gid); err == nil {
+ s += fmt.Sprintf("(%s)", g.Name)
+ }
+ s += " groups="
+ if gids, err := u.GroupIds(); err == nil {
+ for i, gid := range gids {
+ if i > 0 {
+ s += ","
+ }
+ s += gid
+ if g, err := user.LookupGroupId(gid); err == nil {
+ s += fmt.Sprintf("(%s)", g.Name)
+ }
+ }
+ }
+ return s
+}