From 8408c834128282437ad1741b17e5221f0eef7da4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 1 Mar 2016 20:50:48 -0500 Subject: [PATCH] 8488: Determine actual output dir from reading logs. Need to set $(runtime.outdir) correctly for arvados-cwl-runner. --- sdk/cwl/arvados_cwl/__init__.py | 29 ++++++++++++++++++++++++++--- sdk/cwl/setup.py | 2 +- sdk/go/crunchrunner/crunchrunner.go | 4 ++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index 86619d5bb4..105d754aa4 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -25,10 +25,15 @@ from arvados.api import OrderedJsonModel logger = logging.getLogger('arvados.cwl-runner') logger.setLevel(logging.INFO) -crunchrunner_pdh = "721abe848fd8e6e6d1c99b920e6b7a2c+140" +crunchrunner_pdh = "83db29f08544e1c319572a6bd971088a+140" crunchrunner_download = "https://cloud.curoverse.com/collections/download/qr1hi-4zz18-n3m1yxd0vx78jic/1i1u2qtq66k1atziv4ocfgsg5nu5tj11n4r6e0bhvjg03rix4m/crunchrunner" certs_download = "https://cloud.curoverse.com/collections/download/qr1hi-4zz18-n3m1yxd0vx78jic/1i1u2qtq66k1atziv4ocfgsg5nu5tj11n4r6e0bhvjg03rix4m/ca-certificates.crt" +tmpdirre = re.compile(r"\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.tmpdir\)=(.*)") +outdirre = re.compile(r"\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.outdir\)=(.*)") +keepre = re.compile(r"\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.keep\)=(.*)") + + def arv_docker_get_image(api_client, dockerRequirement, pull_image): if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement: dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"] @@ -194,8 +199,26 @@ class ArvadosJob(object): try: outputs = {} if record["output"]: - self.builder.outdir = "keep:" + record["output"] - outputs = self.collect_outputs(self.builder.outdir) + logc = arvados.collection.Collection(record["log"]) + log = logc.open(logc.keys()[0]) + tmpdir = None + outdir = None + keepdir = None + for l in log.readlines(): + g = tmpdirre.match(l) + if g: + tmpdir = g.group(1) + g = outdirre.match(l) + if g: + outdir = g.group(1) + g = keepre.match(l) + if g: + keepdir = g.group(1) + if tmpdir and outdir and keepdir: + break + + self.builder.outdir = outdir + outputs = self.collect_outputs("keep:" + record["output"]) except Exception as e: logger.exception("Got exception while collecting job outputs:") processStatus = "permanentFail" diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index 4e127cca06..3fe4e16de1 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -31,7 +31,7 @@ setup(name='arvados-cwl-runner', ], install_requires=[ 'cwltool>=1.0.20160225202307', - 'arvados-python-client>=0.1.20160122132348' + 'arvados-python-client>=0.1.20160219154918' ], zip_safe=True, cmdclass={'egg_info': tagger}, diff --git a/sdk/go/crunchrunner/crunchrunner.go b/sdk/go/crunchrunner/crunchrunner.go index 081ba50d50..226cf9122b 100644 --- a/sdk/go/crunchrunner/crunchrunner.go +++ b/sdk/go/crunchrunner/crunchrunner.go @@ -213,6 +213,10 @@ func runner(api IArvadosClient, "$(task.outdir)": outdir, "$(task.keep)": keepmount} + log.Printf("crunchrunner: $(task.tmpdir)=%v", tmpdir) + log.Printf("crunchrunner: $(task.outdir)=%v", outdir) + log.Printf("crunchrunner: $(task.keep)=%v", keepmount) + // Set up subprocess for k, v := range taskp.Command { taskp.Command[k] = substitute(v, replacements) -- 2.30.2