18947: Move arvados-dispatch-slurm into arvados-server binary.
[arvados.git] / sdk / cwl / arvados_cwl / arvdocker.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import logging
6 import sys
7 import threading
8 import copy
9 import re
10 import subprocess
11
12 from schema_salad.sourceline import SourceLine
13
14 import cwltool.docker
15 from cwltool.errors import WorkflowException
16 import arvados.commands.keepdocker
17
18 logger = logging.getLogger('arvados.cwl-runner')
19
20 cached_lookups = {}
21 cached_lookups_lock = threading.Lock()
22
23 def determine_image_id(dockerImageId):
24     for line in (
25         subprocess.check_output(  # nosec
26             ["docker", "images", "--no-trunc", "--all"]
27         )
28         .decode("utf-8")
29         .splitlines()
30     ):
31         try:
32             match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
33             split = dockerImageId.split(":")
34             if len(split) == 1:
35                 split.append("latest")
36             elif len(split) == 2:
37                 #  if split[1] doesn't  match valid tag names, it is a part of repository
38                 if not re.match(r"[\w][\w.-]{0,127}", split[1]):
39                     split[0] = split[0] + ":" + split[1]
40                     split[1] = "latest"
41             elif len(split) == 3:
42                 if re.match(r"[\w][\w.-]{0,127}", split[2]):
43                     split[0] = split[0] + ":" + split[1]
44                     split[1] = split[2]
45                     del split[2]
46
47             # check for repository:tag match or image id match
48             if match and (
49                 (split[0] == match.group(1) and split[1] == match.group(2))
50                 or dockerImageId == match.group(3)
51             ):
52                 return match.group(3)
53         except ValueError:
54             pass
55
56     return None
57
58
59 def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
60                          force_pull, tmp_outdir_prefix, match_local_docker):
61     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
62
63     if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
64         return dockerRequirement["http://arvados.org/cwl#dockerCollectionPDH"]
65
66     if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
67         dockerRequirement = copy.deepcopy(dockerRequirement)
68         dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
69         if hasattr(dockerRequirement, 'lc'):
70             dockerRequirement.lc.data["dockerImageId"] = dockerRequirement.lc.data["dockerPull"]
71
72     global cached_lookups
73     global cached_lookups_lock
74     with cached_lookups_lock:
75         if dockerRequirement["dockerImageId"] in cached_lookups:
76             return cached_lookups[dockerRequirement["dockerImageId"]]
77
78     with SourceLine(dockerRequirement, "dockerImageId", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
79         sp = dockerRequirement["dockerImageId"].split(":")
80         image_name = sp[0]
81         image_tag = sp[1] if len(sp) > 1 else "latest"
82
83         images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
84                                                                 image_name=image_name,
85                                                                 image_tag=image_tag)
86
87         if images and match_local_docker:
88             local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
89             if local_image_id:
90                 # find it in the list
91                 found = False
92                 for i in images:
93                     if i[1]["dockerhash"] == local_image_id:
94                         found = True
95                         images = [i]
96                         break
97                 if not found:
98                     # force re-upload.
99                     images = []
100
101         if not images:
102             # Fetch Docker image if necessary.
103             try:
104                 result = cwltool.docker.DockerCommandLineJob.get_image(dockerRequirement, pull_image,
105                                                               force_pull, tmp_outdir_prefix)
106                 if not result:
107                     raise WorkflowException("Docker image '%s' not available" % dockerRequirement["dockerImageId"])
108             except OSError as e:
109                 raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))
110
111             # Upload image to Arvados
112             args = []
113             if project_uuid:
114                 args.append("--project-uuid="+project_uuid)
115             args.append(image_name)
116             args.append(image_tag)
117             logger.info("Uploading Docker image %s:%s", image_name, image_tag)
118             try:
119                 arvados.commands.put.api_client = api_client
120                 arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
121             except SystemExit as e:
122                 # If e.code is None or zero, then keepdocker exited normally and we can continue
123                 if e.code:
124                     raise WorkflowException("keepdocker exited with code %s" % e.code)
125
126             images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
127                                                                     image_name=image_name,
128                                                                     image_tag=image_tag)
129
130         if not images:
131             raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
132
133         pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"]
134
135         with cached_lookups_lock:
136             cached_lookups[dockerRequirement["dockerImageId"]] = pdh
137
138     return pdh
139
140 def arv_docker_clear_cache():
141     global cached_lookups
142     global cached_lookups_lock
143     with cached_lookups_lock:
144         cached_lookups = {}