Merge branch 'main' into 15397-remove-obsolete-apis
[arvados.git] / sdk / cwl / arvados_cwl / arvdocker.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import logging
6 import sys
7 import threading
8 import copy
9 import re
10 import subprocess
11
12 from schema_salad.sourceline import SourceLine
13
14 import cwltool.docker
15 from cwltool.errors import WorkflowException
16 import arvados.commands.keepdocker
17
18 logger = logging.getLogger('arvados.cwl-runner')
19
20 def determine_image_id(dockerImageId):
21     for line in (
22             str(subprocess.check_output(  # nosec
23                 ["docker", "images", "--no-trunc", "--all"]
24             ), "utf-8")
25             .splitlines()
26     ):
27         try:
28             match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
29             split = dockerImageId.split(":")
30             if len(split) == 1:
31                 split.append("latest")
32             elif len(split) == 2:
33                 #  if split[1] doesn't  match valid tag names, it is a part of repository
34                 if not re.match(r"[\w][\w.-]{0,127}", split[1]):
35                     split[0] = split[0] + ":" + split[1]
36                     split[1] = "latest"
37             elif len(split) == 3:
38                 if re.match(r"[\w][\w.-]{0,127}", split[2]):
39                     split[0] = split[0] + ":" + split[1]
40                     split[1] = split[2]
41                     del split[2]
42
43             # check for repository:tag match or image id match
44             if match and (
45                 (split[0] == match.group(1) and split[1] == match.group(2))
46                 or dockerImageId == match.group(3)
47             ):
48                 return match.group(3)
49         except ValueError:
50             pass
51
52     return None
53
54
55 def arv_docker_get_image(api_client, dockerRequirement, pull_image, runtimeContext):
56     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
57
58     project_uuid = runtimeContext.project_uuid
59     force_pull = runtimeContext.force_docker_pull
60     tmp_outdir_prefix = runtimeContext.tmp_outdir_prefix
61     match_local_docker = runtimeContext.match_local_docker
62     copy_deps = runtimeContext.copy_deps
63     cached_lookups = runtimeContext.cached_docker_lookups
64
65     if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
66         return dockerRequirement["http://arvados.org/cwl#dockerCollectionPDH"]
67
68     if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
69         dockerRequirement = copy.deepcopy(dockerRequirement)
70         dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
71         if hasattr(dockerRequirement, 'lc'):
72             dockerRequirement.lc.data["dockerImageId"] = dockerRequirement.lc.data["dockerPull"]
73
74     if dockerRequirement["dockerImageId"] in cached_lookups:
75         return cached_lookups[dockerRequirement["dockerImageId"]]
76
77     with SourceLine(dockerRequirement, "dockerImageId", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
78         sp = dockerRequirement["dockerImageId"].split(":")
79         image_name = sp[0]
80         image_tag = sp[1] if len(sp) > 1 else "latest"
81
82         out_of_project_images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
83                                                                 image_name=image_name,
84                                                                 image_tag=image_tag,
85                                                                 project_uuid=None)
86
87         if copy_deps:
88             # Only images that are available in the destination project
89             images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
90                                                                     image_name=image_name,
91                                                                     image_tag=image_tag,
92                                                                     project_uuid=project_uuid)
93         else:
94             images = out_of_project_images
95
96         if match_local_docker:
97             local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
98             if local_image_id:
99                 # find it in the list
100                 found = False
101                 for i in images:
102                     if i[1]["dockerhash"] == local_image_id:
103                         found = True
104                         images = [i]
105                         break
106                 if not found:
107                     # force re-upload.
108                     images = []
109
110                 for i in out_of_project_images:
111                     if i[1]["dockerhash"] == local_image_id:
112                         found = True
113                         out_of_project_images = [i]
114                         break
115                 if not found:
116                     # force re-upload.
117                     out_of_project_images = []
118
119         if not images:
120             if not out_of_project_images:
121                 # Fetch Docker image if necessary.
122                 try:
123                     dockerjob = cwltool.docker.DockerCommandLineJob(None, None, None, None, None, None)
124                     result = dockerjob.get_image(dockerRequirement, pull_image,
125                                                                   force_pull, tmp_outdir_prefix)
126                     if not result:
127                         raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"])
128                 except OSError as e:
129                     raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))
130
131             # Upload image to Arvados
132             args = []
133             if project_uuid:
134                 args.append("--project-uuid="+project_uuid)
135             args.append(image_name)
136             args.append(image_tag)
137             logger.info("Uploading Docker image %s:%s", image_name, image_tag)
138             try:
139                 arvados.commands.put.api_client = api_client
140                 arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
141             except SystemExit as e:
142                 # If e.code is None or zero, then keepdocker exited normally and we can continue
143                 if e.code:
144                     raise WorkflowException("keepdocker exited with code %s" % e.code)
145
146             images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
147                                                                     image_name=image_name,
148                                                                     image_tag=image_tag,
149                                                                     project_uuid=project_uuid)
150
151         if not images:
152             raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
153
154         pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"]
155
156         cached_lookups[dockerRequirement["dockerImageId"]] = pdh
157
158     return pdh