From: Peter Amstutz Date: Thu, 22 Jul 2021 20:22:39 +0000 (-0400) Subject: 17813: Refactor singularity image loading / caching / conversion X-Git-Tag: 2.3.0~131^2~4 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/5ef89a24d5fa8bc6926a433e22360a09fdb3154d 17813: Refactor singularity image loading / caching / conversion Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index 1937b8815a..d4a8650726 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -260,18 +260,15 @@ func (runner *ContainerRunner) LoadImage() (string, error) { return "", fmt.Errorf("cannot choose from multiple tar files in image collection: %v", tarfiles) } imageID := tarfiles[0][:len(tarfiles[0])-4] - imageFile := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + tarfiles[0] runner.CrunchLog.Printf("Using Docker image id %q", imageID) - if !runner.executor.ImageLoaded(imageID) { - runner.CrunchLog.Print("Loading Docker image from keep") - err = runner.executor.LoadImage(imageFile) - if err != nil { - return "", err - } - } else { - runner.CrunchLog.Print("Container image is available") + runner.CrunchLog.Print("Loading Docker image from keep") + err = runner.executor.LoadImage(imageID, runner.Container, runner.ArvMountPoint, + runner.containerClient, runner.ContainerKeepClient) + if err != nil { + return "", err } + return imageID, nil } @@ -599,6 +596,7 @@ func (runner *ContainerRunner) SetupMounts() (map[string]bindmount, error) { } else { arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id") } + arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_uuid") arvMountCmd = append(arvMountCmd, runner.ArvMountPoint) runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token) @@ -1201,12 +1199,14 @@ func (runner *ContainerRunner) CleanupDirs() { } } } + runner.ArvMount = nil } if runner.ArvMountPoint != "" { if rmerr := os.Remove(runner.ArvMountPoint); rmerr != nil { runner.CrunchLog.Printf("While cleaning up arv-mount directory %s: %v", runner.ArvMountPoint, rmerr) } + runner.ArvMountPoint = "" } if rmerr := os.RemoveAll(runner.parentTemp); rmerr != nil { @@ -1441,6 +1441,7 @@ func (runner *ContainerRunner) Run() (err error) { } checkErr("stopHoststat", runner.stopHoststat()) checkErr("CommitLogs", runner.CommitLogs()) + runner.CleanupDirs() checkErr("UpdateContainerFinal", runner.UpdateContainerFinal()) }() @@ -1458,10 +1459,6 @@ func (runner *ContainerRunner) Run() (err error) { return } - // Communicate some additional configuration to the executor - runner.executor.SetArvadoClient(runner.containerClient, runner.ContainerKeepClient, - runner.Container, runner.ArvMountPoint) - // check for and/or load image imageID, err := runner.LoadImage() if err != nil { diff --git a/lib/crunchrun/docker.go b/lib/crunchrun/docker.go index 4d3f7caf9a..10a2dd38e8 100644 --- a/lib/crunchrun/docker.go +++ b/lib/crunchrun/docker.go @@ -46,12 +46,16 @@ func newDockerExecutor(containerUUID string, logf func(string, ...interface{}), }, err } -func (e *dockerExecutor) ImageLoaded(imageID string) bool { +func (e *dockerExecutor) LoadImage(imageID string, container arvados.Container, arvMountPoint string, + containerClient *arvados.Client, keepClient IKeepClient) error { _, _, err := e.dockerclient.ImageInspectWithRaw(context.TODO(), imageID) - return err == nil -} + if err == nil { + // already loaded + return nil + } + + filename := arvMountPoint + "/by_id/" + container.ContainerImage + "/" + imageID + ".tar" -func (e *dockerExecutor) LoadImage(filename string) error { f, err := os.Open(filename) if err != nil { return err @@ -253,6 +257,3 @@ func (e *dockerExecutor) handleStdoutStderr(stdout, stderr io.Writer, reader io. func (e *dockerExecutor) Close() { e.dockerclient.ContainerRemove(context.TODO(), e.containerID, dockertypes.ContainerRemoveOptions{Force: true}) } - -func (e *dockerExecutor) SetArvadoClient(containerClient *arvados.Client, keepClient IKeepClient, container arvados.Container, keepMount string) { -} diff --git a/lib/crunchrun/executor.go b/lib/crunchrun/executor.go index 05e9adefe3..787edda010 100644 --- a/lib/crunchrun/executor.go +++ b/lib/crunchrun/executor.go @@ -34,13 +34,10 @@ type containerSpec struct { // containerExecutor is an interface to a container runtime // (docker/singularity). type containerExecutor interface { - // ImageLoaded determines whether the given image is already - // available to use without calling ImageLoad. - ImageLoaded(imageID string) bool - // ImageLoad loads the image from the given tarball such that // it can be used to create/start a container. - LoadImage(filename string) error + LoadImage(imageID string, container arvados.Container, keepMount string, + containerClient *arvados.Client, keepClient IKeepClient) error // Wait for the container process to finish, and return its // exit code. If applicable, also remove the stopped container @@ -61,7 +58,4 @@ type containerExecutor interface { // Release resources (temp dirs, stopped containers) Close() - - // Give the executor access to arvados client & container info - SetArvadoClient(containerClient *arvados.Client, keepClient IKeepClient, container arvados.Container, keepMount string) } diff --git a/lib/crunchrun/singularity.go b/lib/crunchrun/singularity.go index 1a1567451d..5d930626a0 100644 --- a/lib/crunchrun/singularity.go +++ b/lib/crunchrun/singularity.go @@ -6,28 +6,23 @@ package crunchrun import ( "fmt" - "io" "io/ioutil" "os" "os/exec" "sort" - "strings" "syscall" + "time" "git.arvados.org/arvados.git/sdk/go/arvados" "golang.org/x/net/context" ) type singularityExecutor struct { - logf func(string, ...interface{}) - spec containerSpec - tmpdir string - child *exec.Cmd - imageFilename string // "sif" image - containerClient *arvados.Client - container arvados.Container - keepClient IKeepClient - keepMount string + logf func(string, ...interface{}) + spec containerSpec + tmpdir string + child *exec.Cmd + imageFilename string // "sif" image } func newSingularityExecutor(logf func(string, ...interface{})) (*singularityExecutor, error) { @@ -41,9 +36,9 @@ func newSingularityExecutor(logf func(string, ...interface{})) (*singularityExec }, nil } -func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, create bool) (*arvados.Group, error) { +func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, containerClient *arvados.Client) (*arvados.Group, error) { var gp arvados.GroupList - err := e.containerClient.RequestAndDecode(&gp, + err := containerClient.RequestAndDecode(&gp, arvados.EndpointGroupList.Method, arvados.EndpointGroupList.Path, nil, arvados.ListOptions{Filters: []arvados.Filter{ @@ -58,11 +53,9 @@ func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, if len(gp.Items) == 1 { return &gp.Items[0], nil } - if !create { - return nil, nil - } + var rgroup arvados.Group - err = e.containerClient.RequestAndDecode(&rgroup, + err = containerClient.RequestAndDecode(&rgroup, arvados.EndpointGroupCreate.Method, arvados.EndpointGroupCreate.Path, nil, map[string]interface{}{ @@ -78,28 +71,22 @@ func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, return &rgroup, nil } -func (e *singularityExecutor) ImageLoaded(imageId string) bool { - if e.containerClient == nil { - return false - } - - // Check if docker image is cached in keep & if so set imageFilename +func (e *singularityExecutor) checkImageCache(dockerImageID string, container arvados.Container, arvMountPoint string, + containerClient *arvados.Client, keepClient IKeepClient) (collectionUuid string, err error) { // Cache the image to keep - cacheGroup, err := e.getOrCreateProject(e.container.RuntimeUserUUID, ".cache", false) + cacheGroup, err := e.getOrCreateProject(container.RuntimeUserUUID, ".cache", containerClient) if err != nil { - e.logf("error getting '.cache' project: %v", err) - return false + return "", fmt.Errorf("error getting '.cache' project: %v", err) } - imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", false) + imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", containerClient) if err != nil { - e.logf("error getting 'auto-generated singularity images' project: %s", err) - return false + return "", fmt.Errorf("error getting 'auto-generated singularity images' project: %s", err) } - collectionName := fmt.Sprintf("singularity image for %v", imageId) + collectionName := fmt.Sprintf("singularity image for %v", dockerImageID) var cl arvados.CollectionList - err = e.containerClient.RequestAndDecode(&cl, + err = containerClient.RequestAndDecode(&cl, arvados.EndpointCollectionList.Method, arvados.EndpointCollectionList.Path, nil, arvados.ListOptions{Filters: []arvados.Filter{ @@ -108,125 +95,99 @@ func (e *singularityExecutor) ImageLoaded(imageId string) bool { }, Limit: 1}) if err != nil { - e.logf("error getting collection '%v' project: %v", err) - return false - } - if len(cl.Items) == 0 { - e.logf("no cached image '%v' found", collectionName) - return false + return "", fmt.Errorf("error querying for collection '%v': %v", err) } + var imageCollection arvados.Collection + if len(cl.Items) == 1 { + imageCollection = cl.Items[0] + } else { + collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339) + + err = containerClient.RequestAndDecode(&imageCollection, + arvados.EndpointCollectionCreate.Method, + arvados.EndpointCollectionCreate.Path, + nil, map[string]interface{}{ + "collection": map[string]string{ + "owner_uuid": imageGroup.UUID, + "name": collectionName, + }, + }) + if err != nil { + e.logf("error creating '%v' collection: %s", collectionName, err) + } - path := fmt.Sprintf("%s/by_id/%s/image.sif", e.keepMount, cl.Items[0].PortableDataHash) - e.logf("Looking for %v", path) - if _, err = os.Stat(path); os.IsNotExist(err) { - return false } - e.imageFilename = path - return true + return imageCollection.UUID, nil } // LoadImage will satisfy ContainerExecuter interface transforming // containerImage into a sif file for later use. -func (e *singularityExecutor) LoadImage(imageTarballPath string) error { - if e.imageFilename != "" { - e.logf("using singularity image %v", e.imageFilename) - - // was set by ImageLoaded - return nil - } +func (e *singularityExecutor) LoadImage(dockerImageID string, container arvados.Container, arvMountPoint string, + containerClient *arvados.Client, keepClient IKeepClient) error { - e.logf("building singularity image") - // "singularity build" does not accept a - // docker-archive://... filename containing a ":" character, - // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a - // symlink that doesn't have ":" chars. - err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar") - if err != nil { - return err - } - e.imageFilename = e.tmpdir + "/image.sif" - build := exec.Command("singularity", "build", e.imageFilename, "docker-archive://"+e.tmpdir+"/image.tar") - e.logf("%v", build.Args) - out, err := build.CombinedOutput() - // INFO: Starting build... - // Getting image source signatures - // Copying blob ab15617702de done - // Copying config 651e02b8a2 done - // Writing manifest to image destination - // Storing signatures - // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3 - // INFO: Creating SIF file... - // INFO: Build complete: arvados-jobs.latest.sif - e.logf("%s", out) + sifCollectionUUID, err := e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient, keepClient) if err != nil { return err } - if e.containerClient == nil { - return nil - } + imageTarballPath := arvMountPoint + "/by_id/" + container.ContainerImage + "/" + dockerImageID + ".tar" - // Cache the image to keep - cacheGroup, err := e.getOrCreateProject(e.container.RuntimeUserUUID, ".cache", true) - if err != nil { - e.logf("error getting '.cache' project: %v", err) - return nil - } - imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", true) - if err != nil { - e.logf("error getting 'auto-generated singularity images' project: %v", err) - return nil - } + imageFilename := fmt.Sprintf("%s/by_uuid/%s/image.sif", arvMountPoint, sifCollectionUUID) - parts := strings.Split(imageTarballPath, "/") - imageId := parts[len(parts)-1] - if strings.HasSuffix(imageId, ".tar") { - imageId = imageId[0 : len(imageId)-4] - } + if _, err = os.Stat(imageFilename); os.IsNotExist(err) { + exec.Command("find", arvMountPoint+"/by_id/").Run() - fs, err := (&arvados.Collection{ManifestText: ""}).FileSystem(e.containerClient, e.keepClient) - if err != nil { - e.logf("error creating FileSystem: %s", err) - } + e.logf("building singularity image") + // "singularity build" does not accept a + // docker-archive://... filename containing a ":" character, + // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a + // symlink that doesn't have ":" chars. + err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar") + if err != nil { + return err + } - dst, err := fs.OpenFile("image.sif", os.O_CREATE|os.O_WRONLY, 0666) - if err != nil { - e.logf("error creating opening collection file for writing: %s", err) + build := exec.Command("singularity", "build", imageFilename, "docker-archive://"+e.tmpdir+"/image.tar") + e.logf("%v", build.Args) + out, err := build.CombinedOutput() + // INFO: Starting build... + // Getting image source signatures + // Copying blob ab15617702de done + // Copying config 651e02b8a2 done + // Writing manifest to image destination + // Storing signatures + // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3 + // INFO: Creating SIF file... + // INFO: Build complete: arvados-jobs.latest.sif + e.logf("%s", out) + if err != nil { + return err + } } - src, err := os.Open(e.imageFilename) - if err != nil { - dst.Close() - return nil - } - defer src.Close() - _, err = io.Copy(dst, src) - if err != nil { - dst.Close() - return nil - } + // update TTL to now + two weeks + exp := time.Now().Add(24 * 7 * 2 * time.Hour) - manifestText, err := fs.MarshalManifest(".") + uuidPath, err := containerClient.PathForUUID("update", sifCollectionUUID) if err != nil { - e.logf("error creating manifest text: %s", err) + e.logf("error PathForUUID: %v", err) + return nil } - var imageCollection arvados.Collection - collectionName := fmt.Sprintf("singularity image for %s", imageId) - err = e.containerClient.RequestAndDecode(&imageCollection, - arvados.EndpointCollectionCreate.Method, - arvados.EndpointCollectionCreate.Path, + err = containerClient.RequestAndDecode(&imageCollection, + arvados.EndpointCollectionUpdate.Method, + uuidPath, nil, map[string]interface{}{ "collection": map[string]string{ - "owner_uuid": imageGroup.UUID, - "name": collectionName, - "manifest_text": manifestText, + "name": fmt.Sprintf("singularity image for %v", dockerImageID), + "trash_at": exp.UTC().Format(time.RFC3339), }, }) if err != nil { - e.logf("error creating '%v' collection: %s", collectionName, err) + e.logf("error updating collection trash_at: %v", err) } + e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, imageCollection.PortableDataHash) return nil } @@ -321,10 +282,3 @@ func (e *singularityExecutor) Close() { e.logf("error removing temp dir: %s", err) } } - -func (e *singularityExecutor) SetArvadoClient(containerClient *arvados.Client, keepClient IKeepClient, container arvados.Container, keepMount string) { - e.containerClient = containerClient - e.container = container - e.keepClient = keepClient - e.keepMount = keepMount -}