19070: Always make copy of Docker images with --project-uuid
authorPeter Amstutz <peter.amstutz@curii.com>
Thu, 28 Apr 2022 16:53:55 +0000 (12:53 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Wed, 11 May 2022 21:27:13 +0000 (17:27 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/runner.py
sdk/python/arvados/commands/keepdocker.py

index 04e2a4cffcfb5e674aa471555aa5e0c7fac2033e..d5295afc23a4d8511569091ef96e94a8c7a6268c 100644 (file)
@@ -80,11 +80,17 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
         image_name = sp[0]
         image_tag = sp[1] if len(sp) > 1 else "latest"
 
+        out_of_project_images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
+                                                                image_name=image_name,
+                                                                image_tag=image_tag,
+                                                                project_uuid=None)
+
         images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
                                                                 image_name=image_name,
-                                                                image_tag=image_tag)
+                                                                image_tag=image_tag,
+                                                                project_uuid=project_uuid)
 
-        if images and match_local_docker:
+        if match_local_docker:
             local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
             if local_image_id:
                 # find it in the list
@@ -98,15 +104,25 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                     # force re-upload.
                     images = []
 
+                for i in out_of_project_images:
+                    if i[1]["dockerhash"] == local_image_id:
+                        found = True
+                        out_of_project_images = [i]
+                        break
+                if not found:
+                    # force re-upload.
+                    out_of_project_images = []
+
         if not images:
-            # Fetch Docker image if necessary.
-            try:
-                result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image,
-                                                              force_pull, tmp_outdir_prefix)
-                if not result:
-                    raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"])
-            except OSError as e:
-                raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))
+            if not out_of_project_images:
+                # Fetch Docker image if necessary.
+                try:
+                    result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image,
+                                                                  force_pull, tmp_outdir_prefix)
+                    if not result:
+                        raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"])
+                except OSError as e:
+                    raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))
 
             # Upload image to Arvados
             args = []
@@ -125,7 +141,8 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
 
             images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
                                                                     image_name=image_name,
-                                                                    image_tag=image_tag)
+                                                                    image_tag=image_tag,
+                                                                    project_uuid=project_uuid)
 
         if not images:
             raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
index 680ca0b7b2c85df6b2f7d55709205b47ad591ef7..ef371b43dfd9068fa5d7c94ad412e26a6833eae4 100644 (file)
@@ -571,6 +571,10 @@ The 'jobs' API is no longer supported.
         else:
             tool = updated_tool
 
+        if runtimeContext.update_workflow and self.project_uuid is None:
+            existing_wf = self.api.workflows().get(uuid=runtimeContext.update_workflow).execute()
+            self.project_uuid = existing_wf["owner_uuid"]
+
         # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
         # Also uploads docker images.
         merged_map = upload_workflow_deps(self, tool)
@@ -584,13 +588,12 @@ The 'jobs' API is no longer supported.
         loadingContext.metadata = tool.metadata
         tool = load_tool(tool.tool, loadingContext)
 
-        existing_uuid = runtimeContext.update_workflow
-        if existing_uuid or runtimeContext.create_workflow:
+        if runtimeContext.update_workflow or runtimeContext.create_workflow:
             # Create a pipeline template or workflow record and exit.
             if self.work_api == "containers":
                 uuid = upload_workflow(self, tool, job_order,
                                         self.project_uuid,
-                                        uuid=existing_uuid,
+                                        uuid=runtimeContext.update_workflow,
                                         submit_runner_ram=runtimeContext.submit_runner_ram,
                                         name=runtimeContext.name,
                                         merged_map=merged_map,
index 7d4310b0e0ce94b9430ded7f60ca04416e2964b9..dae0541bb4098bbc0bdc43432e673c8028d6d5ea 100644 (file)
@@ -456,9 +456,9 @@ def upload_docker(arvrunner, tool):
         (docker_req, docker_is_req) = tool.get_requirement("DockerRequirement")
         if docker_req:
             if docker_req.get("dockerOutputDirectory") and arvrunner.work_api != "containers":
-                # TODO: can be supported by containers API, but not jobs API.
                 raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
                     "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
+
             arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid,
                                                        arvrunner.runtimeContext.force_docker_pull,
                                                        arvrunner.runtimeContext.tmp_outdir_prefix,
index c8f6e78087acb056bd558c3d520cdc4b62940a9c..db4edd2dfa6f1e089979c56bbb6751afd84b2c3c 100644 (file)
@@ -258,7 +258,7 @@ def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
         'tag': tag,
         }
 
-def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
+def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None, project_uuid=None):
     """List all Docker images known to the api_client with image_name and
     image_tag.  If no image_name is given, defaults to listing all
     Docker images.
@@ -273,13 +273,18 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
     search_filters = []
     repo_links = None
     hash_links = None
+
+    project_filter = []
+    if project_uuid is not None:
+        project_filter = [["owner_uuid", "=", project_uuid]]
+
     if image_name:
         # Find images with the name the user specified.
         search_links = _get_docker_links(
             api_client, num_retries,
             filters=[['link_class', '=', 'docker_image_repo+tag'],
                      ['name', '=',
-                      '{}:{}'.format(image_name, image_tag or 'latest')]])
+                      '{}:{}'.format(image_name, image_tag or 'latest')]]+project_filter)
         if search_links:
             repo_links = search_links
         else:
@@ -287,7 +292,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
             search_links = _get_docker_links(
                 api_client, num_retries,
                 filters=[['link_class', '=', 'docker_image_hash'],
-                         ['name', 'ilike', image_name + '%']])
+                         ['name', 'ilike', image_name + '%']]+project_filter)
             hash_links = search_links
         # Only list information about images that were found in the search.
         search_filters.append(['head_uuid', 'in',
@@ -299,7 +304,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
     if hash_links is None:
         hash_links = _get_docker_links(
             api_client, num_retries,
-            filters=search_filters + [['link_class', '=', 'docker_image_hash']])
+            filters=search_filters + [['link_class', '=', 'docker_image_hash']]+project_filter)
     hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
 
     # Each collection may have more than one name (though again, one name
@@ -309,7 +314,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
         repo_links = _get_docker_links(
             api_client, num_retries,
             filters=search_filters + [['link_class', '=',
-                                       'docker_image_repo+tag']])
+                                       'docker_image_repo+tag']]+project_filter)
     seen_image_names = collections.defaultdict(set)
     images = []
     for link in repo_links:
@@ -337,7 +342,7 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
     # Remove any image listings that refer to unknown collections.
     existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
             api_client.collections().list, num_retries,
-            filters=[['uuid', 'in', [im['collection'] for im in images]]],
+            filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter,
             select=['uuid'])}
     return [(image['collection'], image) for image in images
             if image['collection'] in existing_coll_uuids]