X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7b51c206b53b1c542be66bd0e277ff2ad87894c0..HEAD:/sdk/cwl/arvados_cwl/arvcontainer.py diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index 6e3e42975e..34b79d67b4 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -2,10 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from future import standard_library -standard_library.install_aliases() -from builtins import str - import logging import json import os @@ -27,6 +23,9 @@ from cwltool.job import JobBase import arvados.collection +import crunchstat_summary.summarizer +import crunchstat_summary.reader + from .arvdocker import arv_docker_get_image from . import done from .runner import Runner, arvados_jobs_image, packed_workflow, trim_anonymous_location, remove_redundant_fields, make_builder @@ -370,8 +369,13 @@ class ArvadosContainer(JobBase): ram_multiplier = [1] oom_retry_req, _ = self.get_requirement("http://arvados.org/cwl#OutOfMemoryRetry") - if oom_retry_req and oom_retry_req.get('memoryRetryMultipler'): - ram_multiplier.append(oom_retry_req.get('memoryRetryMultipler')) + if oom_retry_req: + if oom_retry_req.get('memoryRetryMultiplier'): + ram_multiplier.append(oom_retry_req.get('memoryRetryMultiplier')) + elif oom_retry_req.get('memoryRetryMultipler'): + ram_multiplier.append(oom_retry_req.get('memoryRetryMultipler')) + else: + ram_multiplier.append(2) if runtimeContext.runnerjob.startswith("arvwf:"): wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob.index("#")] @@ -492,11 +496,14 @@ class ArvadosContainer(JobBase): else: processStatus = "permanentFail" - if processStatus == "permanentFail" and record["log_uuid"]: - logc = arvados.collection.CollectionReader(record["log_uuid"], - api_client=self.arvrunner.api, - keep_client=self.arvrunner.keep_client, - num_retries=self.arvrunner.num_retries) + logc = None + if record["log_uuid"]: + logc = arvados.collection.Collection(record["log_uuid"], + api_client=self.arvrunner.api, + keep_client=self.arvrunner.keep_client, + num_retries=self.arvrunner.num_retries) + + if processStatus == "permanentFail" and logc is not None: label = self.arvrunner.label(self) done.logtail( logc, logger.error, @@ -522,6 +529,28 @@ class ArvadosContainer(JobBase): uuid=self.uuid, body={"container_request": {"properties": properties}} ).execute(num_retries=self.arvrunner.num_retries) + + if logc is not None and self.job_runtime.enable_usage_report is not False: + try: + summarizer = crunchstat_summary.summarizer.ContainerRequestSummarizer( + record, + collection_object=logc, + label=self.name, + arv=self.arvrunner.api) + summarizer.run() + with logc.open("usage_report.html", "wt") as mr: + mr.write(summarizer.html_report()) + logc.save() + + # Post warnings about nodes that are under-utilized. + for rc in summarizer._recommend_gen(lambda x: x): + self.job_runtime.usage_report_notes.append(rc) + + except Exception as e: + logger.warning("%s unable to generate resource usage report", + self.arvrunner.label(self), + exc_info=(e if self.arvrunner.debug else False)) + except WorkflowException as e: # Only include a stack trace if in debug mode. # A stack trace may obfuscate more useful output about the workflow. @@ -699,6 +728,12 @@ class RunnerContainer(Runner): if runtimeContext.prefer_cached_downloads: command.append("--prefer-cached-downloads") + if runtimeContext.enable_usage_report is True: + command.append("--enable-usage-report") + + if runtimeContext.enable_usage_report is False: + command.append("--disable-usage-report") + if self.fast_parser: command.append("--fast-parser") @@ -739,14 +774,9 @@ class RunnerContainer(Runner): logger.info("%s submitted container_request %s", self.arvrunner.label(self), response["uuid"]) - workbench1 = self.arvrunner.api.config()["Services"]["Workbench1"]["ExternalURL"] workbench2 = self.arvrunner.api.config()["Services"]["Workbench2"]["ExternalURL"] - url = "" if workbench2: url = "{}processes/{}".format(workbench2, response["uuid"]) - elif workbench1: - url = "{}container_requests/{}".format(workbench1, response["uuid"]) - if url: logger.info("Monitor workflow progress at %s", url)