X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ed9be4cc10c12051bdb1a65210726ab1d688108c..b9df977af1ca8d7fe942cbe13956fa19ed88b91d:/sdk/cwl/arvados_cwl/arvcontainer.py?ds=sidebyside diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index eb0afc19d8..d6e3098ab4 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -37,7 +37,8 @@ class ArvadosContainer(object): "output_path": self.outdir, "cwd": self.outdir, "priority": 1, - "state": "Committed" + "state": "Committed", + "properties": {} } runtime_constraints = {} mounts = { @@ -113,19 +114,27 @@ class ArvadosContainer(object): container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters + if kwargs.get("runnerjob", "").startswith("arvwf:"): + wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] + wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) + if container_request["name"] == "main": + container_request["name"] = wfrecord["name"] + container_request["properties"]["template_uuid"] = wfuuid + try: response = self.arvrunner.api.container_requests().create( body=container_request ).execute(num_retries=self.arvrunner.num_retries) - self.arvrunner.processes[response["uuid"]] = self + self.uuid = response["uuid"] + self.arvrunner.processes[self.uuid] = self - logger.info("Container request %s (%s) state is %s", self.name, response["uuid"], response["state"]) + logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) if response["state"] == "Final": self.done(response) except Exception as e: - logger.error("Got error %s" % str(e)) + logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail") def done(self, record): @@ -148,17 +157,28 @@ class ArvadosContainer(object): else: processStatus = "permanentFail" - outputs = {} + if processStatus == "permanentFail": + logc = arvados.collection.CollectionReader(container["log"], + api_client=self.arvrunner.api, + keep_client=self.arvrunner.keep_client, + num_retries=self.arvrunner.num_retries) + done.logtail(logc, logger, "%s error log:" % self.arvrunner.label(self)) + outputs = {} if container["output"]: - try: - outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep") - except Exception as e: - logger.error("Got error %s" % str(e)) - self.output_callback({}, "permanentFail") + outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep") + except WorkflowException as e: + logger.error("%s unable to collect output from %s:\n%s", + self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False)) + processStatus = "permanentFail" + except Exception as e: + logger.exception("%s while getting output object: %s", self.arvrunner.label(self), e) + self.output_callback({}, "permanentFail") + else: self.output_callback(outputs, processStatus) finally: - del self.arvrunner.processes[record["uuid"]] + if record["uuid"] in self.arvrunner.processes: + del self.arvrunner.processes[record["uuid"]] class RunnerContainer(Runner): @@ -199,7 +219,8 @@ class RunnerContainer(Runner): "vcpus": 1, "ram": 1024*1024 * self.submit_runner_ram, "API": True - } + }, + "properties": {} } workflowcollection = workflowmapper.mapper(self.tool.tool["id"])[1] @@ -213,22 +234,27 @@ class RunnerContainer(Runner): } elif workflowcollection.startswith("arvwf:"): workflowpath = "/var/lib/cwl/workflow.json#main" - fetcher = CollectionFetcher({}, None, - api_client=self.arvrunner.api, - keep_client=self.arvrunner.keep_client) - wfobj = yaml.safe_load(fetcher.fetch_text(workflowcollection)) + wfuuid = workflowcollection[6:workflowcollection.index("#")] + wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) + wfobj = yaml.safe_load(wfrecord["definition"]) + if container_req["name"].startswith("arvwf:"): + container_req["name"] = wfrecord["name"] container_req["mounts"]["/var/lib/cwl/workflow.json"] = { "kind": "json", "json": wfobj } + container_req["properties"]["template_uuid"] = wfuuid - command = ["arvados-cwl-runner", "--local", "--api=containers"] + command = ["arvados-cwl-runner", "--local", "--api=containers", "--no-log-timestamps"] if self.output_name: command.append("--output-name=" + self.output_name) if self.output_tags: command.append("--output-tags=" + self.output_tags) + if kwargs.get("debug"): + command.append("--debug") + if self.enable_reuse: command.append("--enable-reuse") else: @@ -251,9 +277,9 @@ class RunnerContainer(Runner): ).execute(num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] - self.arvrunner.processes[response["uuid"]] = self + self.arvrunner.processes[self.uuid] = self - logger.info("Submitted container %s", response["uuid"]) + logger.info("%s submitted container %s", self.arvrunner.label(self), response["uuid"]) if response["state"] == "Final": self.done(response) @@ -264,10 +290,10 @@ class RunnerContainer(Runner): uuid=record["container_uuid"] ).execute(num_retries=self.arvrunner.num_retries) except Exception as e: - logger.exception("While getting runner container: %s", e) + logger.exception("%s while getting runner container: %s", self.arvrunner.label(self), e) self.arvrunner.output_callback({}, "permanentFail") - del self.arvrunner.processes[record["uuid"]] else: super(RunnerContainer, self).done(container) finally: - del self.arvrunner.processes[record["uuid"]] + if record["uuid"] in self.arvrunner.processes: + del self.arvrunner.processes[record["uuid"]]