table(table table-bordered table-condensed).
|_. Field |_. Type |_. Description |
|outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation. A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
-Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire the workflow.|
+Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire workflow.|
h2. cwltool:Secrets
The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in an existing project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
-For example, this will copy the collection to project dstcl-j7d0g-a894213ukjhal12 in the destination cluster.
+For example, this will copy the collection to project @dstcl-j7d0g-a894213ukjhal12@ in the destination cluster.
<notextile> <pre><code>~$ <span class="userinput">arv-copy --src pirca --dst dstcl --project-uuid dstcl-j7d0g-a894213ukjhal12 jutro-4zz18-tv416l321i4r01e
</code></pre>
</notextile>
+Additionally, if you need to specify the storage classes where to save the copied data on the destination cluster, you can do that by using the @--storage-classes LIST@ argument, where @LIST@ is a comma-separated list of storage class names.
+
h3. How to copy a workflow
We will use the uuid @jutro-7fd4e-mkmmq53m1ze6apx@ as an example workflow.
return "", fmt.Errorf("cannot choose from multiple tar files in image collection: %v", tarfiles)
}
imageID := tarfiles[0][:len(tarfiles[0])-4]
- imageFile := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + tarfiles[0]
+ imageTarballPath := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + imageID + ".tar"
runner.CrunchLog.Printf("Using Docker image id %q", imageID)
- if !runner.executor.ImageLoaded(imageID) {
- runner.CrunchLog.Print("Loading Docker image from keep")
- err = runner.executor.LoadImage(imageFile)
- if err != nil {
- return "", err
- }
- } else {
- runner.CrunchLog.Print("Docker image is available")
+ runner.CrunchLog.Print("Loading Docker image from keep")
+ err = runner.executor.LoadImage(imageID, imageTarballPath, runner.Container, runner.ArvMountPoint,
+ runner.containerClient)
+ if err != nil {
+ return "", err
}
+
return imageID, nil
}
} else {
arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
}
+ arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_uuid")
arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token)
}
}
}
+ runner.ArvMount = nil
}
if runner.ArvMountPoint != "" {
if rmerr := os.Remove(runner.ArvMountPoint); rmerr != nil {
runner.CrunchLog.Printf("While cleaning up arv-mount directory %s: %v", runner.ArvMountPoint, rmerr)
}
+ runner.ArvMountPoint = ""
}
if rmerr := os.RemoveAll(runner.parentTemp); rmerr != nil {
}
checkErr("stopHoststat", runner.stopHoststat())
checkErr("CommitLogs", runner.CommitLogs())
+ runner.CleanupDirs()
checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
}()
exit chan int
}
-func (e *stubExecutor) ImageLoaded(imageID string) bool { return e.imageLoaded }
-func (e *stubExecutor) LoadImage(filename string) error { e.loaded = filename; return e.loadErr }
+func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
+ containerClient *arvados.Client) error {
+ e.loaded = tarball
+ return e.loadErr
+}
func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
func (e *stubExecutor) Start() error { e.exit = make(chan int, 1); go e.runFunc(); return e.startErr }
func (e *stubExecutor) CgroupID() string { return "cgroupid" }
imageID, err = s.runner.LoadImage()
c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
c.Check(s.executor.loaded, Equals, "")
-
- // if executor reports image is already loaded, LoadImage should not be called
- s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
- s.executor.imageLoaded = true
- s.executor.loaded = ""
- s.executor.loadErr = nil
- imageID, err = s.runner.LoadImage()
- c.Check(err, IsNil)
- c.Check(s.executor.loaded, Equals, "")
- c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
}
type ArvErrorTestClient struct{}
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
"/keepout": {realTemp + "/keep1/tmp0", false},
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
"/keepout": {realTemp + "/keep1/tmp0", false},
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/tmp": {realTemp + "/tmp2", false},
"/tmp/foo": {realTemp + "/keep1/tmp0", true},
"strings"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerclient "github.com/docker/docker/client"
}, err
}
-func (e *dockerExecutor) ImageLoaded(imageID string) bool {
+func (e *dockerExecutor) LoadImage(imageID string, imageTarballPath string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) error {
_, _, err := e.dockerclient.ImageInspectWithRaw(context.TODO(), imageID)
- return err == nil
-}
+ if err == nil {
+ // already loaded
+ return nil
+ }
-func (e *dockerExecutor) LoadImage(filename string) error {
- f, err := os.Open(filename)
+ f, err := os.Open(imageTarballPath)
if err != nil {
return err
}
import (
"io"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
)
// containerExecutor is an interface to a container runtime
// (docker/singularity).
type containerExecutor interface {
- // ImageLoaded determines whether the given image is already
- // available to use without calling ImageLoad.
- ImageLoaded(imageID string) bool
-
// ImageLoad loads the image from the given tarball such that
// it can be used to create/start a container.
- LoadImage(filename string) error
+ LoadImage(imageID string, imageTarballPath string, container arvados.Container, keepMount string,
+ containerClient *arvados.Client) error
// Wait for the container process to finish, and return its
// exit code. If applicable, also remove the stopped container
"strings"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
. "gopkg.in/check.v1"
)
Stdout: nopWriteCloser{&s.stdout},
Stderr: nopWriteCloser{&s.stderr},
}
- err := s.executor.LoadImage(busyboxDockerImage(c))
+ err := s.executor.LoadImage("", busyboxDockerImage(c), arvados.Container{}, "", nil)
c.Assert(err, IsNil)
}
package crunchrun
import (
+ "fmt"
"io/ioutil"
"os"
"os/exec"
"sort"
"syscall"
+ "time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
)
}, nil
}
-func (e *singularityExecutor) ImageLoaded(string) bool {
- return false
+func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, containerClient *arvados.Client) (*arvados.Group, error) {
+ var gp arvados.GroupList
+ err := containerClient.RequestAndDecode(&gp,
+ arvados.EndpointGroupList.Method,
+ arvados.EndpointGroupList.Path,
+ nil, arvados.ListOptions{Filters: []arvados.Filter{
+ arvados.Filter{"owner_uuid", "=", ownerUuid},
+ arvados.Filter{"name", "=", name},
+ arvados.Filter{"group_class", "=", "project"},
+ },
+ Limit: 1})
+ if err != nil {
+ return nil, err
+ }
+ if len(gp.Items) == 1 {
+ return &gp.Items[0], nil
+ }
+
+ var rgroup arvados.Group
+ err = containerClient.RequestAndDecode(&rgroup,
+ arvados.EndpointGroupCreate.Method,
+ arvados.EndpointGroupCreate.Path,
+ nil, map[string]interface{}{
+ "group": map[string]string{
+ "owner_uuid": ownerUuid,
+ "name": name,
+ "group_class": "project",
+ },
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &rgroup, nil
+}
+
+func (e *singularityExecutor) checkImageCache(dockerImageID string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) (collection *arvados.Collection, err error) {
+
+ // Cache the image to keep
+ cacheGroup, err := e.getOrCreateProject(container.RuntimeUserUUID, ".cache", containerClient)
+ if err != nil {
+ return nil, fmt.Errorf("error getting '.cache' project: %v", err)
+ }
+ imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", containerClient)
+ if err != nil {
+ return nil, fmt.Errorf("error getting 'auto-generated singularity images' project: %s", err)
+ }
+
+ collectionName := fmt.Sprintf("singularity image for %v", dockerImageID)
+ var cl arvados.CollectionList
+ err = containerClient.RequestAndDecode(&cl,
+ arvados.EndpointCollectionList.Method,
+ arvados.EndpointCollectionList.Path,
+ nil, arvados.ListOptions{Filters: []arvados.Filter{
+ arvados.Filter{"owner_uuid", "=", imageGroup.UUID},
+ arvados.Filter{"name", "=", collectionName},
+ },
+ Limit: 1})
+ if err != nil {
+ return nil, fmt.Errorf("error querying for collection '%v': %v", collectionName, err)
+ }
+ var imageCollection arvados.Collection
+ if len(cl.Items) == 1 {
+ imageCollection = cl.Items[0]
+ } else {
+ collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339)
+ exp := time.Now().Add(24 * 7 * 2 * time.Hour)
+ err = containerClient.RequestAndDecode(&imageCollection,
+ arvados.EndpointCollectionCreate.Method,
+ arvados.EndpointCollectionCreate.Path,
+ nil, map[string]interface{}{
+ "collection": map[string]string{
+ "owner_uuid": imageGroup.UUID,
+ "name": collectionName,
+ "trash_at": exp.UTC().Format(time.RFC3339),
+ },
+ })
+ if err != nil {
+ return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err)
+ }
+
+ }
+
+ return &imageCollection, nil
}
// LoadImage will satisfy ContainerExecuter interface transforming
// containerImage into a sif file for later use.
-func (e *singularityExecutor) LoadImage(imageTarballPath string) error {
- e.logf("building singularity image")
- // "singularity build" does not accept a
- // docker-archive://... filename containing a ":" character,
- // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a
- // symlink that doesn't have ":" chars.
- err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar")
+func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) error {
+
+ var imageFilename string
+ var sifCollection *arvados.Collection
+ var err error
+ if containerClient != nil {
+ sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient)
+ if err != nil {
+ return err
+ }
+ imageFilename = fmt.Sprintf("%s/by_uuid/%s/image.sif", arvMountPoint, sifCollection.UUID)
+ } else {
+ imageFilename = e.tmpdir + "/image.sif"
+ }
+
+ if _, err := os.Stat(imageFilename); os.IsNotExist(err) {
+ e.logf("building singularity image")
+ // "singularity build" does not accept a
+ // docker-archive://... filename containing a ":" character,
+ // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a
+ // symlink that doesn't have ":" chars.
+ err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar")
+ if err != nil {
+ return err
+ }
+
+ build := exec.Command("singularity", "build", imageFilename, "docker-archive://"+e.tmpdir+"/image.tar")
+ e.logf("%v", build.Args)
+ out, err := build.CombinedOutput()
+ // INFO: Starting build...
+ // Getting image source signatures
+ // Copying blob ab15617702de done
+ // Copying config 651e02b8a2 done
+ // Writing manifest to image destination
+ // Storing signatures
+ // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3
+ // INFO: Creating SIF file...
+ // INFO: Build complete: arvados-jobs.latest.sif
+ e.logf("%s", out)
+ if err != nil {
+ return err
+ }
+ }
+
+ if containerClient == nil {
+ e.imageFilename = imageFilename
+ return nil
+ }
+
+ // update TTL to now + two weeks
+ exp := time.Now().Add(24 * 7 * 2 * time.Hour)
+
+ uuidPath, err := containerClient.PathForUUID("update", sifCollection.UUID)
if err != nil {
- return err
+ e.logf("error PathForUUID: %v", err)
+ return nil
+ }
+ var imageCollection arvados.Collection
+ err = containerClient.RequestAndDecode(&imageCollection,
+ arvados.EndpointCollectionUpdate.Method,
+ uuidPath,
+ nil, map[string]interface{}{
+ "collection": map[string]string{
+ "name": fmt.Sprintf("singularity image for %v", dockerImageID),
+ "trash_at": exp.UTC().Format(time.RFC3339),
+ },
+ })
+ if err == nil {
+ // If we just wrote the image to the cache, the
+ // response also returns the updated PDH
+ e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, imageCollection.PortableDataHash)
+ return nil
}
- e.imageFilename = e.tmpdir + "/image.sif"
- build := exec.Command("singularity", "build", e.imageFilename, "docker-archive://"+e.tmpdir+"/image.tar")
- e.logf("%v", build.Args)
- out, err := build.CombinedOutput()
- // INFO: Starting build...
- // Getting image source signatures
- // Copying blob ab15617702de done
- // Copying config 651e02b8a2 done
- // Writing manifest to image destination
- // Storing signatures
- // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3
- // INFO: Creating SIF file...
- // INFO: Build complete: arvados-jobs.latest.sif
- e.logf("%s", out)
+
+ e.logf("error updating/renaming collection for cached sif image: %v", err)
+ // Failed to update but maybe it lost a race and there is
+ // another cached collection in the same place, so check the cache
+ // again
+ sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient)
if err != nil {
return err
}
+ e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, sifCollection.PortableDataHash)
+
return nil
}
mount := e.spec.BindMounts[path]
args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly])
}
- args = append(args, e.imageFilename)
- args = append(args, e.spec.Command...)
// This is for singularity 3.5.2. There are some behaviors
// that will change in singularity 3.6, please see:
// https://sylabs.io/guides/3.5/user-guide/environment_and_metadata.html
env := make([]string, 0, len(e.spec.Env))
for k, v := range e.spec.Env {
- env = append(env, "SINGULARITYENV_"+k+"="+v)
+ if k == "HOME" {
+ // $HOME is a special case
+ args = append(args, "--home="+v)
+ } else {
+ env = append(env, "SINGULARITYENV_"+k+"="+v)
+ }
}
+ args = append(args, e.imageFilename)
+ args = append(args, e.spec.Command...)
+
path, err := exec.LookPath(args[0])
if err != nil {
return err
GatewayAddress string `json:"gateway_address"`
InteractiveSessionStarted bool `json:"interactive_session_started"`
OutputStorageClasses []string `json:"output_storage_classes"`
+ RuntimeUserUUID string `json:"runtime_user_uuid"`
+ RuntimeAuthScopes []string `json:"runtime_auth_scopes"`
+ RuntimeToken string `json:"runtime_token"`
}
// ContainerRequest is an arvados#container_request resource.
copy_opts.add_argument(
'--project-uuid', dest='project_uuid',
help='The UUID of the project at the destination to which the collection or workflow should be copied.')
+ copy_opts.add_argument(
+ '--storage-classes', dest='storage_classes',
+ help='Comma separated list of storage classes to be used when saving data to the destinaton Arvados instance.')
copy_opts.add_argument(
'object_uuid',
parents=[copy_opts, arv_cmd.retry_opt])
args = parser.parse_args()
+ if args.storage_classes:
+ args.storage_classes = [x for x in args.storage_classes.strip().replace(' ', '').split(',') if x]
+
if args.verbose:
logger.setLevel(logging.DEBUG)
else:
if not body["name"]:
body['name'] = "copied from " + collection_uuid
+ if args.storage_classes:
+ body['storage_classes_desired'] = args.storage_classes
+
body['owner_uuid'] = args.project_uuid
dst_collection = dst.collections().create(body=body, ensure_unique_name=True).execute(num_retries=args.retries)
if progress_writer:
progress_writer.report(obj_uuid, bytes_written, bytes_expected)
data = src_keep.get(word)
- dst_locator = dst_keep.put(data)
+ dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
dst_locators[blockhash] = dst_locator
bytes_written += loc.size
dst_manifest.write(' ')
with c.open('foo', 'wt') as f:
f.write('foo')
c.save_new("arv-copy foo collection", owner_uuid=src_proj)
+ coll_record = api.collections().get(uuid=c.manifest_locator()).execute()
+ assert coll_record['storage_classes_desired'] == ['default']
dest_proj = api.groups().create(body={"group": {"name": "arv-copy dest project", "group_class": "project"}}).execute()["uuid"]
assert len(contents["items"]) == 0
try:
- self.run_copy(["--project-uuid", dest_proj, src_proj])
+ self.run_copy(["--project-uuid", dest_proj, "--storage-classes", "foo", src_proj])
except SystemExit as e:
assert e.code == 0
assert contents["items"][0]["uuid"] != c.manifest_locator()
assert contents["items"][0]["name"] == "arv-copy foo collection"
assert contents["items"][0]["portable_data_hash"] == c.portable_data_hash()
+ assert contents["items"][0]["storage_classes_desired"] == ["foo"]
finally:
os.environ['HOME'] = home_was
# already know how to properly treat them.
attribute :secret_mounts, :jsonbHash, default: {}
attribute :runtime_status, :jsonbHash, default: {}
- attribute :runtime_auth_scopes, :jsonbHash, default: {}
+ attribute :runtime_auth_scopes, :jsonbArray, default: []
attribute :output_storage_classes, :jsonbArray, default: ["default"]
serialize :environment, Hash