|==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner (default 1024)|
|==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner|
|==--always-submit-runner==|When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool|
+|==--match-submitter-images==|Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.|
|==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one.|
|==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster|
|==--name NAME==|Name to use for workflow execution instance.|
Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse. If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run. This also applies to individual workflow steps. For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step. You can disable this behavior with @--disable-reuse@.
+h3(#docker). Docker images
+
+Docker images referenced by the workflow must be uploaded to Arvados. This requires @docker@ to be installed and usable by the user running @arvados-cwl-runner@. If the image is not present in the local Docker instance, @arvados-cwl-runner@ will first attempt to pull the image using @docker pull@, then upload it.
+
+If there is already a Docker image in Arvados with the same name, it will use the existing image. In this case, the submitter will not use Docker.
+
+The @--match-submitter-images@ option will check the id of the image in the local Docker instance and compare it to the id of the image already in Arvados with the same name and tag. If they are different, it will choose the image matching the local image id, which will be uploaded it if necessary. This helpful for development, if you locally rebuild the image with the 'latest' tag, the @--match-submitter-images@ will ensure that the newer version is used.
+
h3. Command line options
See "arvados-cwl-runner options":{{site.baseurl}}/user/cwl/cwl-run-options.html
help="When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool",
default=False)
+ parser.add_argument("--match-submitter-images", action="store_true",
+ default=False, dest="match_local_docker",
+ help="Where Arvados has more than one Docker image of the same name, use image from the Docker instance on the submitting node.")
+
exgroup = parser.add_mutually_exclusive_group()
exgroup.add_argument("--submit-request-uuid",
default=None,
runtimeContext.pull_image,
runtimeContext.project_uuid,
runtimeContext.force_docker_pull,
- runtimeContext.tmp_outdir_prefix)
+ runtimeContext.tmp_outdir_prefix,
+ runtimeContext.match_local_docker)
network_req, _ = self.get_requirement("NetworkAccess")
if network_req:
import sys
import threading
import copy
+import re
+import subprocess
from schema_salad.sourceline import SourceLine
cached_lookups = {}
cached_lookups_lock = threading.Lock()
+def determine_image_id(dockerImageId):
+ for line in (
+ subprocess.check_output( # nosec
+ ["docker", "images", "--no-trunc", "--all"]
+ )
+ .decode("utf-8")
+ .splitlines()
+ ):
+ try:
+ match = re.match(r"^([^ ]+)\s+([^ ]+)\s+([^ ]+)", line)
+ split = dockerImageId.split(":")
+ if len(split) == 1:
+ split.append("latest")
+ elif len(split) == 2:
+ # if split[1] doesn't match valid tag names, it is a part of repository
+ if not re.match(r"[\w][\w.-]{0,127}", split[1]):
+ split[0] = split[0] + ":" + split[1]
+ split[1] = "latest"
+ elif len(split) == 3:
+ if re.match(r"[\w][\w.-]{0,127}", split[2]):
+ split[0] = split[0] + ":" + split[1]
+ split[1] = split[2]
+ del split[2]
+
+ # check for repository:tag match or image id match
+ if match and (
+ (split[0] == match.group(1) and split[1] == match.group(2))
+ or dockerImageId == match.group(3)
+ ):
+ return match.group(3)
+ except ValueError:
+ pass
+
+ return None
+
+
def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid,
- force_pull, tmp_outdir_prefix):
+ force_pull, tmp_outdir_prefix, match_local_docker):
"""Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
image_name=image_name,
image_tag=image_tag)
+ if images and match_local_docker:
+ local_image_id = determine_image_id(dockerRequirement["dockerImageId"])
+ if local_image_id:
+ # find it in the list
+ found = False
+ for i in images:
+ if i[1]["dockerhash"] == local_image_id:
+ found = True
+ images = [i]
+ break
+ if not found:
+ # force re-upload.
+ images = []
+
if not images:
# Fetch Docker image if necessary.
try:
self.cluster_target_id = 0
self.always_submit_runner = False
self.collection_cache_size = 256
+ self.match_local_docker = False
super(ArvRuntimeContext, self).__init__(kwargs)
"Option 'dockerOutputDirectory' of DockerRequirement not supported.")
arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid,
arvrunner.runtimeContext.force_docker_pull,
- arvrunner.runtimeContext.tmp_outdir_prefix)
+ arvrunner.runtimeContext.tmp_outdir_prefix,
+ arvrunner.runtimeContext.match_local_docker)
else:
arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs:"+__version__},
True, arvrunner.project_uuid,
arvrunner.runtimeContext.force_docker_pull,
- arvrunner.runtimeContext.tmp_outdir_prefix)
+ arvrunner.runtimeContext.tmp_outdir_prefix,
+ arvrunner.runtimeContext.match_local_docker)
elif isinstance(tool, cwltool.workflow.Workflow):
for s in tool.steps:
upload_docker(arvrunner, s.embedded_tool)
v["http://arvados.org/cwl#dockerCollectionPDH"] = arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, v, True,
arvrunner.project_uuid,
arvrunner.runtimeContext.force_docker_pull,
- arvrunner.runtimeContext.tmp_outdir_prefix)
+ arvrunner.runtimeContext.tmp_outdir_prefix,
+ arvrunner.runtimeContext.match_local_docker)
for l in v:
visit(v[l], cur_id)
if isinstance(v, list):
try:
return arvados_cwl.arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid,
arvrunner.runtimeContext.force_docker_pull,
- arvrunner.runtimeContext.tmp_outdir_prefix)
+ arvrunner.runtimeContext.tmp_outdir_prefix,
+ arvrunner.runtimeContext.match_local_docker)
except Exception as e:
raise Exception("Docker image %s is not available\n%s" % (img, e) )
# file to determine what version of cwltool and schema-salad to
# build.
install_requires=[
- 'cwltool==3.1.20220217190813',
+ 'cwltool==3.1.20220217222804',
'schema-salad==8.2.20211116214159',
'arvados-python-client{}'.format(pysdk_dep),
'setuptools',
}))
+ # The test passes no builder.resources
+ # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+ @mock.patch("arvados_cwl.arvdocker.determine_image_id")
+ @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+ def test_match_local_docker(self, keepdocker, determine_image_id):
+ arvados_cwl.add_arv_hints()
+ arv_docker_clear_cache()
+
+ runner = mock.MagicMock()
+ runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
+ runner.secret_store = cwltool.secrets.SecretStore()
+ runner.api._rootDesc = {"revision": "20210628"}
+
+ keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz4", {"dockerhash": "456"}),
+ ("zzzzz-4zz18-zzzzzzzzzzzzzz3", {"dockerhash": "123"})]
+ determine_image_id.side_effect = lambda x: "123"
+ def execute(uuid):
+ ex = mock.MagicMock()
+ lookup = {"zzzzz-4zz18-zzzzzzzzzzzzzz4": {"portable_data_hash": "99999999999999999999999999999994+99"},
+ "zzzzz-4zz18-zzzzzzzzzzzzzz3": {"portable_data_hash": "99999999999999999999999999999993+99"}}
+ ex.execute.return_value = lookup[uuid]
+ return ex
+ runner.api.collections().get.side_effect = execute
+
+ tool = cmap({
+ "inputs": [],
+ "outputs": [],
+ "baseCommand": "echo",
+ "arguments": [],
+ "id": "",
+ "cwlVersion": "v1.2",
+ "class": "CommandLineTool"
+ })
+
+ loadingContext, runtimeContext = self.helper(runner, True)
+
+ arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
+ arvtool.formatgraph = None
+
+ container_request = {
+ 'environment': {
+ 'HOME': '/var/spool/cwl',
+ 'TMPDIR': '/tmp'
+ },
+ 'name': 'test_run_True',
+ 'runtime_constraints': {
+ 'vcpus': 1,
+ 'ram': 268435456
+ },
+ 'use_existing': True,
+ 'priority': 500,
+ 'mounts': {
+ '/tmp': {'kind': 'tmp',
+ "capacity": 1073741824
+ },
+ '/var/spool/cwl': {'kind': 'tmp',
+ "capacity": 1073741824 }
+ },
+ 'state': 'Committed',
+ 'output_name': 'Output for step test_run_True',
+ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+ 'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
+ 'container_image': '99999999999999999999999999999994+99',
+ 'command': ['echo'],
+ 'cwd': '/var/spool/cwl',
+ 'scheduling_parameters': {},
+ 'properties': {},
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
+ }
+
+ runtimeContext.match_local_docker = False
+ for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+ j.run(runtimeContext)
+ runner.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher(container_request))
+
+ arv_docker_clear_cache()
+ runtimeContext.match_local_docker = True
+ container_request['container_image'] = '99999999999999999999999999999993+99'
+ container_request['name'] = 'test_run_True_2'
+ container_request['output_name'] = 'Output for step test_run_True_2'
+ for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+ j.run(runtimeContext)
+ runner.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher(container_request))
+
+
+
class TestWorkflow(unittest.TestCase):
def setUp(self):
cwltool.process._names = set()
stubs.keep_client = keep_client2
stubs.docker_images = {
- "arvados/jobs:"+arvados_cwl.__version__: [("zzzzz-4zz18-zzzzzzzzzzzzzd3", "")],
- "debian:buster-slim": [("zzzzz-4zz18-zzzzzzzzzzzzzd4", "")],
- "arvados/jobs:123": [("zzzzz-4zz18-zzzzzzzzzzzzzd5", "")],
- "arvados/jobs:latest": [("zzzzz-4zz18-zzzzzzzzzzzzzd6", "")],
+ "arvados/jobs:"+arvados_cwl.__version__: [("zzzzz-4zz18-zzzzzzzzzzzzzd3", {})],
+ "debian:buster-slim": [("zzzzz-4zz18-zzzzzzzzzzzzzd4", {})],
+ "arvados/jobs:123": [("zzzzz-4zz18-zzzzzzzzzzzzzd5", {})],
+ "arvados/jobs:latest": [("zzzzz-4zz18-zzzzzzzzzzzzzd6", {})],
}
def kd(a, b, image_name=None, image_tag=None):
return stubs.docker_images.get("%s:%s" % (image_name, image_tag), [])
arvrunner.project_uuid = ""
api.return_value = mock.MagicMock()
arvrunner.api = api.return_value
+ arvrunner.runtimeContext.match_local_docker = False
arvrunner.api.links().list().execute.side_effect = ({"items": [{"created_at": "",
"head_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
"link_class": "docker_image_repo+tag",