X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/68b9c7d30c7f47f8a9f9cff8a327fa9a3812d4da..844ff7cc1dc1c93a29b7ad8eca2987b987cf89e6:/lib/crunchrun/singularity.go diff --git a/lib/crunchrun/singularity.go b/lib/crunchrun/singularity.go index 741f542454..1af0d420e4 100644 --- a/lib/crunchrun/singularity.go +++ b/lib/crunchrun/singularity.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "sort" + "strings" "syscall" "time" @@ -36,6 +37,14 @@ func newSingularityExecutor(logf func(string, ...interface{})) (*singularityExec }, nil } +func (e *singularityExecutor) Runtime() string { + buf, err := exec.Command("singularity", "--version").CombinedOutput() + if err != nil { + return "singularity (unknown version)" + } + return strings.TrimSuffix(string(buf), "\n") +} + func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, containerClient *arvados.Client) (*arvados.Group, error) { var gp arvados.GroupList err := containerClient.RequestAndDecode(&gp, @@ -101,7 +110,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar if len(cl.Items) == 1 { imageCollection = cl.Items[0] } else { - collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339) + collectionName := "converting " + collectionName exp := time.Now().Add(24 * 7 * 2 * time.Hour) err = containerClient.RequestAndDecode(&imageCollection, arvados.EndpointCollectionCreate.Method, @@ -112,6 +121,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar "name": collectionName, "trash_at": exp.UTC().Format(time.RFC3339), }, + "ensure_unique_name": true, }) if err != nil { return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err) @@ -141,6 +151,12 @@ func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath s } if _, err := os.Stat(imageFilename); os.IsNotExist(err) { + // Make sure the docker image is readable, and error + // out if not. + if _, err := os.Stat(imageTarballPath); err != nil { + return err + } + e.logf("building singularity image") // "singularity build" does not accept a // docker-archive://... filename containing a ":" character, @@ -151,7 +167,22 @@ func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath s return err } + // Set up a cache and tmp dir for singularity build + err = os.Mkdir(e.tmpdir+"/cache", 0700) + if err != nil { + return err + } + defer os.RemoveAll(e.tmpdir + "/cache") + err = os.Mkdir(e.tmpdir+"/tmp", 0700) + if err != nil { + return err + } + defer os.RemoveAll(e.tmpdir + "/tmp") + build := exec.Command("singularity", "build", imageFilename, "docker-archive://"+e.tmpdir+"/image.tar") + build.Env = os.Environ() + build.Env = append(build.Env, "SINGULARITY_CACHEDIR="+e.tmpdir+"/cache") + build.Env = append(build.Env, "SINGULARITY_TMPDIR="+e.tmpdir+"/tmp") e.logf("%v", build.Args) out, err := build.CombinedOutput() // INFO: Starting build... @@ -217,11 +248,16 @@ func (e *singularityExecutor) Create(spec containerSpec) error { return nil } -func (e *singularityExecutor) Start() error { - args := []string{"singularity", "exec", "--containall", "--no-home", "--cleanenv", "--pwd", e.spec.WorkingDir} +func (e *singularityExecutor) execCmd(path string) *exec.Cmd { + args := []string{path, "exec", "--containall", "--cleanenv", "--pwd", e.spec.WorkingDir} if !e.spec.EnableNetwork { args = append(args, "--net", "--network=none") } + + if e.spec.CUDADeviceCount != 0 { + args = append(args, "--nv") + } + readonlyflag := map[bool]string{ false: "rw", true: "ro", @@ -233,7 +269,12 @@ func (e *singularityExecutor) Start() error { sort.Strings(binds) for _, path := range binds { mount := e.spec.BindMounts[path] - args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly]) + if path == e.spec.Env["HOME"] { + // Singularity treates $HOME as special case + args = append(args, "--home", mount.HostPath+":"+path) + } else { + args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly]) + } } // This is for singularity 3.5.2. There are some behaviors @@ -243,21 +284,34 @@ func (e *singularityExecutor) Start() error { env := make([]string, 0, len(e.spec.Env)) for k, v := range e.spec.Env { if k == "HOME" { - // $HOME is a special case - args = append(args, "--home="+v) - } else { - env = append(env, "SINGULARITYENV_"+k+"="+v) + // Singularity treates $HOME as special case, this is handled + // with --home above + continue } + env = append(env, "SINGULARITYENV_"+k+"="+v) + } + + // Singularity always makes all nvidia devices visible to the + // container. If a resource manager such as slurm or LSF told + // us to select specific devices we need to propagate that. + if cudaVisibleDevices := os.Getenv("CUDA_VISIBLE_DEVICES"); cudaVisibleDevices != "" { + // If a resource manager such as slurm or LSF told + // us to select specific devices we need to propagate that. + env = append(env, "SINGULARITYENV_CUDA_VISIBLE_DEVICES="+cudaVisibleDevices) } + // Singularity's default behavior is to evaluate each + // SINGULARITYENV_* env var with a shell as a double-quoted + // string and pass the result to the contained + // process. Singularity 3.10+ has an option to pass env vars + // through literally without evaluating, which is what we + // want. See https://github.com/sylabs/singularity/pull/704 + // and https://dev.arvados.org/issues/19081 + env = append(env, "SINGULARITY_NO_EVAL=1") args = append(args, e.imageFilename) args = append(args, e.spec.Command...) - path, err := exec.LookPath(args[0]) - if err != nil { - return err - } - child := &exec.Cmd{ + return &exec.Cmd{ Path: path, Args: args, Env: env, @@ -265,6 +319,14 @@ func (e *singularityExecutor) Start() error { Stdout: e.spec.Stdout, Stderr: e.spec.Stderr, } +} + +func (e *singularityExecutor) Start() error { + path, err := exec.LookPath("singularity") + if err != nil { + return err + } + child := e.execCmd(path) err = child.Start() if err != nil { return err