X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/718b62dc0b6710fff949aba5ea818d2a196c3a42..6358388ad9f7676aa3b1ab149c9fbde4716929e5:/sdk/cwl/arvados_cwl/arvcontainer.py diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index 560e8979ec..c85443a23a 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -63,7 +63,7 @@ class ArvadosContainer(JobBase): env["TMPDIR"] = self.tmpdir return env - def run(self, runtimeContext): + def run(self, toplevelRuntimeContext): # ArvadosCommandTool subclasses from cwltool.CommandLineTool, # which calls makeJobRunner() to get a new ArvadosContainer # object. The fields that define execution such as @@ -246,7 +246,8 @@ class ArvadosContainer(JobBase): runtimeContext.pull_image, runtimeContext.project_uuid, runtimeContext.force_docker_pull, - runtimeContext.tmp_outdir_prefix) + runtimeContext.tmp_outdir_prefix, + runtimeContext.match_local_docker) network_req, _ = self.get_requirement("NetworkAccess") if network_req: @@ -291,6 +292,25 @@ class ArvadosContainer(JobBase): else: container_request["output_storage_classes"] = runtimeContext.intermediate_storage_classes.strip().split(",") + cuda_req, _ = self.get_requirement("http://commonwl.org/cwltool#CUDARequirement") + if cuda_req: + runtime_constraints["cuda"] = { + "device_count": resources.get("cudaDeviceCount", 1), + "driver_version": cuda_req["cudaVersionMin"], + "hardware_capability": aslist(cuda_req["cudaComputeCapability"])[0] + } + + if runtimeContext.enable_preemptible is False: + scheduling_parameters["preemptible"] = False + else: + preemptible_req, _ = self.get_requirement("http://arvados.org/cwl#UsePreemptible") + if preemptible_req: + scheduling_parameters["preemptible"] = preemptible_req["usePreemptible"] + elif runtimeContext.enable_preemptible is True: + scheduling_parameters["preemptible"] = True + elif runtimeContext.enable_preemptible is None: + pass + if self.timelimit is not None and self.timelimit > 0: scheduling_parameters["max_run_time"] = self.timelimit @@ -372,6 +392,10 @@ class ArvadosContainer(JobBase): processStatus = "success" else: processStatus = "permanentFail" + + if rcode == 137: + logger.warning("%s job was killed on the compute instance. The most common reason is that it attempted to allocate too much RAM and was targeted by the Out Of Memory (OOM) killer. Try resubmitting with a higher 'ramMin'.", + self.arvrunner.label(self)) else: processStatus = "permanentFail" @@ -520,10 +544,10 @@ class RunnerContainer(Runner): if runtimeContext.debug: command.append("--debug") - if runtimeContext.storage_classes != "default": + if runtimeContext.storage_classes != "default" and runtimeContext.storage_classes: command.append("--storage-classes=" + runtimeContext.storage_classes) - if runtimeContext.intermediate_storage_classes != "default": + if runtimeContext.intermediate_storage_classes != "default" and runtimeContext.intermediate_storage_classes: command.append("--intermediate-storage-classes=" + runtimeContext.intermediate_storage_classes) if self.on_error: @@ -541,6 +565,12 @@ class RunnerContainer(Runner): if self.enable_dev: command.append("--enable-dev") + if runtimeContext.enable_preemptible is True: + command.append("--enable-preemptible") + + if runtimeContext.enable_preemptible is False: + command.append("--disable-preemptible") + command.extend([workflowpath, "/var/lib/cwl/cwl.input.json"]) container_req["command"] = command