Merge branch 'master' into 9998-unsigned_manifest
[arvados.git] / sdk / cwl / arvados_cwl / done.py
index c755565135fe98ca42e80b1e904ad7d65c3ad6af..87908c28ce762b48094cad22e433796a43495266 100644 (file)
@@ -1,4 +1,6 @@
+import re
 from cwltool.errors import WorkflowException
+from collections import deque
 
 def done(self, record, tmpdir, outdir, keepdir):
     colname = "Output %s of %s" % (record["output"][0:7], self.name)
@@ -22,8 +24,8 @@ def done(self, record, tmpdir, outdir, keepdir):
 
         if not collections["items"]:
             raise WorkflowException(
-                "Job output '%s' cannot be found on API server" % (
-                    record["output"]))
+                "[job %s] output '%s' cannot be found on API server" % (
+                    self.name, record["output"]))
 
         # Create new collection in the parent project
         # with the output contents.
@@ -41,3 +43,26 @@ def done_outputs(self, record, tmpdir, outdir, keepdir):
     self.builder.outdir = outdir
     self.builder.pathmapper.keepdir = keepdir
     return self.collect_outputs("keep:" + record["output"])
+
+crunchstat_re = re.compile(r"^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d [a-z0-9]{5}-8i9sb-[a-z0-9]{15} \d+ \d+ stderr crunchstat:")
+timestamp_re = re.compile(r"^(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z) (.*)")
+
+def logtail(logcollection, logger, header, maxlen=25):
+    logtail = deque([], maxlen*len(logcollection))
+    containersapi = ("crunch-run.txt" in logcollection)
+
+    for log in logcollection.keys():
+        if not containersapi or log in ("crunch-run.txt", "stdout.txt", "stderr.txt"):
+            logname = log[:-4]
+            with logcollection.open(log) as f:
+                for l in f:
+                    if containersapi:
+                        g = timestamp_re.match(l)
+                        logtail.append("%s %s %s" % (g.group(1), logname, g.group(2)))
+                    elif not crunchstat_re.match(l):
+                        logtail.append(l)
+    if len(logcollection) > 1:
+        logtail = sorted(logtail)[-maxlen:]
+    logtxt = "\n  ".join(l.strip() for l in logtail)
+    logger.info(header)
+    logger.info("\n  %s", logtxt)