From be6d658dd95d3a0d63bb3441e50877a4916a5b8e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 28 Apr 2022 12:53:55 -0400 Subject: [PATCH] 19070: Always make copy of Docker images with --project-uuid Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/arvdocker.py | 39 ++++++++++++++++------- sdk/cwl/arvados_cwl/executor.py | 9 ++++-- sdk/cwl/arvados_cwl/runner.py | 2 +- sdk/python/arvados/commands/keepdocker.py | 17 ++++++---- 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py index 04e2a4cffc..d5295afc23 100644 --- a/sdk/cwl/arvados_cwl/arvdocker.py +++ b/sdk/cwl/arvados_cwl/arvdocker.py @@ -80,11 +80,17 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid image_name = sp[0] image_tag = sp[1] if len(sp) > 1 else "latest" + out_of_project_images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, + image_name=image_name, + image_tag=image_tag, + project_uuid=None) + images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, image_name=image_name, - image_tag=image_tag) + image_tag=image_tag, + project_uuid=project_uuid) - if images and match_local_docker: + if match_local_docker: local_image_id = determine_image_id(dockerRequirement["dockerImageId"]) if local_image_id: # find it in the list @@ -98,15 +104,25 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid # force re-upload. images = [] + for i in out_of_project_images: + if i[1]["dockerhash"] == local_image_id: + found = True + out_of_project_images = [i] + break + if not found: + # force re-upload. + out_of_project_images = [] + if not images: - # Fetch Docker image if necessary. - try: - result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image, - force_pull, tmp_outdir_prefix) - if not result: - raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"]) - except OSError as e: - raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e)) + if not out_of_project_images: + # Fetch Docker image if necessary. + try: + result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image, + force_pull, tmp_outdir_prefix) + if not result: + raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"]) + except OSError as e: + raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e)) # Upload image to Arvados args = [] @@ -125,7 +141,8 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3, image_name=image_name, - image_tag=image_tag) + image_tag=image_tag, + project_uuid=project_uuid) if not images: raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag)) diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py index 680ca0b7b2..ef371b43df 100644 --- a/sdk/cwl/arvados_cwl/executor.py +++ b/sdk/cwl/arvados_cwl/executor.py @@ -571,6 +571,10 @@ The 'jobs' API is no longer supported. else: tool = updated_tool + if runtimeContext.update_workflow and self.project_uuid is None: + existing_wf = self.api.workflows().get(uuid=runtimeContext.update_workflow).execute() + self.project_uuid = existing_wf["owner_uuid"] + # Upload direct dependencies of workflow steps, get back mapping of files to keep references. # Also uploads docker images. merged_map = upload_workflow_deps(self, tool) @@ -584,13 +588,12 @@ The 'jobs' API is no longer supported. loadingContext.metadata = tool.metadata tool = load_tool(tool.tool, loadingContext) - existing_uuid = runtimeContext.update_workflow - if existing_uuid or runtimeContext.create_workflow: + if runtimeContext.update_workflow or runtimeContext.create_workflow: # Create a pipeline template or workflow record and exit. if self.work_api == "containers": uuid = upload_workflow(self, tool, job_order, self.project_uuid, - uuid=existing_uuid, + uuid=runtimeContext.update_workflow, submit_runner_ram=runtimeContext.submit_runner_ram, name=runtimeContext.name, merged_map=merged_map, diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index 7d4310b0e0..dae0541bb4 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -456,9 +456,9 @@ def upload_docker(arvrunner, tool): (docker_req, docker_is_req) = tool.get_requirement("DockerRequirement") if docker_req: if docker_req.get("dockerOutputDirectory") and arvrunner.work_api != "containers": - # TODO: can be supported by containers API, but not jobs API. raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError( "Option 'dockerOutputDirectory' of DockerRequirement not supported.") + arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid, arvrunner.runtimeContext.force_docker_pull, arvrunner.runtimeContext.tmp_outdir_prefix, diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py index c8f6e78087..db4edd2dfa 100644 --- a/sdk/python/arvados/commands/keepdocker.py +++ b/sdk/python/arvados/commands/keepdocker.py @@ -258,7 +258,7 @@ def _new_image_listing(link, dockerhash, repo='', tag=''): 'tag': tag, } -def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None): +def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None, project_uuid=None): """List all Docker images known to the api_client with image_name and image_tag. If no image_name is given, defaults to listing all Docker images. @@ -273,13 +273,18 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) search_filters = [] repo_links = None hash_links = None + + project_filter = [] + if project_uuid is not None: + project_filter = [["owner_uuid", "=", project_uuid]] + if image_name: # Find images with the name the user specified. search_links = _get_docker_links( api_client, num_retries, filters=[['link_class', '=', 'docker_image_repo+tag'], ['name', '=', - '{}:{}'.format(image_name, image_tag or 'latest')]]) + '{}:{}'.format(image_name, image_tag or 'latest')]]+project_filter) if search_links: repo_links = search_links else: @@ -287,7 +292,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) search_links = _get_docker_links( api_client, num_retries, filters=[['link_class', '=', 'docker_image_hash'], - ['name', 'ilike', image_name + '%']]) + ['name', 'ilike', image_name + '%']]+project_filter) hash_links = search_links # Only list information about images that were found in the search. search_filters.append(['head_uuid', 'in', @@ -299,7 +304,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) if hash_links is None: hash_links = _get_docker_links( api_client, num_retries, - filters=search_filters + [['link_class', '=', 'docker_image_hash']]) + filters=search_filters + [['link_class', '=', 'docker_image_hash']]+project_filter) hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)} # Each collection may have more than one name (though again, one name @@ -309,7 +314,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) repo_links = _get_docker_links( api_client, num_retries, filters=search_filters + [['link_class', '=', - 'docker_image_repo+tag']]) + 'docker_image_repo+tag']]+project_filter) seen_image_names = collections.defaultdict(set) images = [] for link in repo_links: @@ -337,7 +342,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None) # Remove any image listings that refer to unknown collections. existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all( api_client.collections().list, num_retries, - filters=[['uuid', 'in', [im['collection'] for im in images]]], + filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter, select=['uuid'])} return [(image['collection'], image) for image in images if image['collection'] in existing_coll_uuids] -- 2.30.2