X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/6766d1b04bd238d05890f3ec221c65e84920dde6..d59079ca2a8e1afb3cf02bd9908c2cfe4c025d4f:/lib/crunchrun/singularity.go diff --git a/lib/crunchrun/singularity.go b/lib/crunchrun/singularity.go index 45926b065f..61fecad0a1 100644 --- a/lib/crunchrun/singularity.go +++ b/lib/crunchrun/singularity.go @@ -10,21 +10,19 @@ import ( "os" "os/exec" "sort" - "strings" "syscall" + "time" "git.arvados.org/arvados.git/sdk/go/arvados" "golang.org/x/net/context" ) type singularityExecutor struct { - logf func(string, ...interface{}) - spec containerSpec - tmpdir string - child *exec.Cmd - imageFilename string // "sif" image - containerClient *arvados.Client - container arvados.Container + logf func(string, ...interface{}) + spec containerSpec + tmpdir string + child *exec.Cmd + imageFilename string // "sif" image } func newSingularityExecutor(logf func(string, ...interface{})) (*singularityExecutor, error) { @@ -38,27 +36,26 @@ func newSingularityExecutor(logf func(string, ...interface{})) (*singularityExec }, nil } -func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, create bool) (*arvados.Group, error) { +func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, containerClient *arvados.Client) (*arvados.Group, error) { var gp arvados.GroupList - err := e.containerClient.RequestAndDecode(&gp, + err := containerClient.RequestAndDecode(&gp, arvados.EndpointGroupList.Method, arvados.EndpointGroupList.Path, nil, arvados.ListOptions{Filters: []arvados.Filter{ arvados.Filter{"owner_uuid", "=", ownerUuid}, arvados.Filter{"name", "=", name}, arvados.Filter{"group_class", "=", "project"}, - }}) + }, + Limit: 1}) if err != nil { return nil, err } - if len(gp.Items) > 0 { + if len(gp.Items) == 1 { return &gp.Items[0], nil } - if !create { - return nil, nil - } + var rgroup arvados.Group - err = e.containerClient.RequestAndDecode(&rgroup, + err = containerClient.RequestAndDecode(&rgroup, arvados.EndpointGroupCreate.Method, arvados.EndpointGroupCreate.Path, nil, map[string]interface{}{ @@ -74,75 +71,150 @@ func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, return &rgroup, nil } -func (e *singularityExecutor) ImageLoaded(string) bool { - // Check if docker image is cached in keep & if so set imageFilename +func (e *singularityExecutor) checkImageCache(dockerImageID string, container arvados.Container, arvMountPoint string, + containerClient *arvados.Client) (collection *arvados.Collection, err error) { - return false + // Cache the image to keep + cacheGroup, err := e.getOrCreateProject(container.RuntimeUserUUID, ".cache", containerClient) + if err != nil { + return nil, fmt.Errorf("error getting '.cache' project: %v", err) + } + imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", containerClient) + if err != nil { + return nil, fmt.Errorf("error getting 'auto-generated singularity images' project: %s", err) + } + + collectionName := fmt.Sprintf("singularity image for %v", dockerImageID) + var cl arvados.CollectionList + err = containerClient.RequestAndDecode(&cl, + arvados.EndpointCollectionList.Method, + arvados.EndpointCollectionList.Path, + nil, arvados.ListOptions{Filters: []arvados.Filter{ + arvados.Filter{"owner_uuid", "=", imageGroup.UUID}, + arvados.Filter{"name", "=", collectionName}, + }, + Limit: 1}) + if err != nil { + return nil, fmt.Errorf("error querying for collection '%v': %v", collectionName, err) + } + var imageCollection arvados.Collection + if len(cl.Items) == 1 { + imageCollection = cl.Items[0] + } else { + collectionName := "converting " + collectionName + exp := time.Now().Add(24 * 7 * 2 * time.Hour) + err = containerClient.RequestAndDecode(&imageCollection, + arvados.EndpointCollectionCreate.Method, + arvados.EndpointCollectionCreate.Path, + nil, map[string]interface{}{ + "collection": map[string]string{ + "owner_uuid": imageGroup.UUID, + "name": collectionName, + "trash_at": exp.UTC().Format(time.RFC3339), + }, + "ensure_unique_name": true, + }) + if err != nil { + return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err) + } + + } + + return &imageCollection, nil } // LoadImage will satisfy ContainerExecuter interface transforming // containerImage into a sif file for later use. -func (e *singularityExecutor) LoadImage(imageTarballPath string) error { - if e.imageFilename != "" { - // was set by ImageLoaded - return nil - } +func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath string, container arvados.Container, arvMountPoint string, + containerClient *arvados.Client) error { - e.logf("building singularity image") - // "singularity build" does not accept a - // docker-archive://... filename containing a ":" character, - // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a - // symlink that doesn't have ":" chars. - err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar") - if err != nil { - return err + var imageFilename string + var sifCollection *arvados.Collection + var err error + if containerClient != nil { + sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient) + if err != nil { + return err + } + imageFilename = fmt.Sprintf("%s/by_uuid/%s/image.sif", arvMountPoint, sifCollection.UUID) + } else { + imageFilename = e.tmpdir + "/image.sif" } - e.imageFilename = e.tmpdir + "/image.sif" - build := exec.Command("singularity", "build", e.imageFilename, "docker-archive://"+e.tmpdir+"/image.tar") - e.logf("%v", build.Args) - out, err := build.CombinedOutput() - // INFO: Starting build... - // Getting image source signatures - // Copying blob ab15617702de done - // Copying config 651e02b8a2 done - // Writing manifest to image destination - // Storing signatures - // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3 - // INFO: Creating SIF file... - // INFO: Build complete: arvados-jobs.latest.sif - e.logf("%s", out) - if err != nil { - return err + + if _, err := os.Stat(imageFilename); os.IsNotExist(err) { + // Make sure the docker image is readable, and error + // out if not. + if _, err := os.Stat(imageTarballPath); err != nil { + return err + } + + e.logf("building singularity image") + // "singularity build" does not accept a + // docker-archive://... filename containing a ":" character, + // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a + // symlink that doesn't have ":" chars. + err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar") + if err != nil { + return err + } + + build := exec.Command("singularity", "build", imageFilename, "docker-archive://"+e.tmpdir+"/image.tar") + e.logf("%v", build.Args) + out, err := build.CombinedOutput() + // INFO: Starting build... + // Getting image source signatures + // Copying blob ab15617702de done + // Copying config 651e02b8a2 done + // Writing manifest to image destination + // Storing signatures + // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3 + // INFO: Creating SIF file... + // INFO: Build complete: arvados-jobs.latest.sif + e.logf("%s", out) + if err != nil { + return err + } } - // Cache the image to keep - cacheGroup, err := e.getOrCreateProject(e.container.RuntimeUserUUID, ".cache", true) - if err != nil { - e.logf("error getting '.cache' project: %s", err) + if containerClient == nil { + e.imageFilename = imageFilename return nil } - imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", true) + + // update TTL to now + two weeks + exp := time.Now().Add(24 * 7 * 2 * time.Hour) + + uuidPath, err := containerClient.PathForUUID("update", sifCollection.UUID) if err != nil { - e.logf("error getting 'auto-generated singularity images' project: %s", err) + e.logf("error PathForUUID: %v", err) return nil } - - parts := strings.Split(imageTarballPath, "/") - imageId := parts[len(parts)-1] - var imageCollection arvados.Collection - err = e.containerClient.RequestAndDecode(&imageCollection, - arvados.EndpointCollectionCreate.Method, - arvados.EndpointCollectionCreate.Path, + err = containerClient.RequestAndDecode(&imageCollection, + arvados.EndpointCollectionUpdate.Method, + uuidPath, nil, map[string]interface{}{ "collection": map[string]string{ - "owner_uuid": imageGroup.UUID, - "name": fmt.Sprintf("singularity image for %s", imageId), - } + "name": fmt.Sprintf("singularity image for %v", dockerImageID), + "trash_at": exp.UTC().Format(time.RFC3339), + }, }) + if err == nil { + // If we just wrote the image to the cache, the + // response also returns the updated PDH + e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, imageCollection.PortableDataHash) + return nil + } + + e.logf("error updating/renaming collection for cached sif image: %v", err) + // Failed to update but maybe it lost a race and there is + // another cached collection in the same place, so check the cache + // again + sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient) if err != nil { - e.logf("error creating 'auto-generated singularity images' collection: %s", err) + return err } + e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, sifCollection.PortableDataHash) return nil } @@ -170,8 +242,6 @@ func (e *singularityExecutor) Start() error { mount := e.spec.BindMounts[path] args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly]) } - args = append(args, e.imageFilename) - args = append(args, e.spec.Command...) // This is for singularity 3.5.2. There are some behaviors // that will change in singularity 3.6, please see: @@ -179,9 +249,17 @@ func (e *singularityExecutor) Start() error { // https://sylabs.io/guides/3.5/user-guide/environment_and_metadata.html env := make([]string, 0, len(e.spec.Env)) for k, v := range e.spec.Env { - env = append(env, "SINGULARITYENV_"+k+"="+v) + if k == "HOME" { + // $HOME is a special case + args = append(args, "--home="+v) + } else { + env = append(env, "SINGULARITYENV_"+k+"="+v) + } } + args = append(args, e.imageFilename) + args = append(args, e.spec.Command...) + path, err := exec.LookPath(args[0]) if err != nil { return err @@ -231,8 +309,3 @@ func (e *singularityExecutor) Close() { e.logf("error removing temp dir: %s", err) } } - -func (e *singularityExecutor) SetArvadoClient(containerClient *arvados.Client, container arvados.Container) { - e.containerClient = containerClient - e.container = container -}