Merge branch '19464-capture-git-info' refs #19464
[arvados.git] / sdk / cwl / arvados_cwl / done.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 from future.utils import viewvalues
6
7 import re
8 from cwltool.errors import WorkflowException
9 from collections import deque
10
11 def done(self, record, tmpdir, outdir, keepdir):
12     cols = [
13         ("output", "Output %s of %s" % (record["output"][0:7], self.name), record["output"]),
14         ("log", "Log of %s" % (record["uuid"]), record["log"])
15     ]
16
17     for coltype, colname, colpdh in cols:
18         # check if collection already exists with same owner, name and content
19         collection_exists = self.arvrunner.api.collections().list(
20             filters=[["owner_uuid", "=", self.arvrunner.project_uuid],
21                      ['portable_data_hash', '=', colpdh],
22                      ["name", "=", colname]]
23         ).execute(num_retries=self.arvrunner.num_retries)
24
25         if not collection_exists["items"]:
26             # Create a collection located in the same project as the
27             # pipeline with the contents of the output/log.
28             # First, get output/log record.
29             collections = self.arvrunner.api.collections().list(
30                 limit=1,
31                 filters=[['portable_data_hash', '=', colpdh]],
32                 select=["manifest_text"]
33             ).execute(num_retries=self.arvrunner.num_retries)
34
35             if not collections["items"]:
36                 raise WorkflowException(
37                     "[job %s] %s '%s' cannot be found on API server" % (
38                         self.name, coltype, colpdh))
39
40             # Create new collection in the parent project
41             # with the output/log contents.
42             self.arvrunner.api.collections().create(body={
43                 "owner_uuid": self.arvrunner.project_uuid,
44                 "name": colname,
45                 "portable_data_hash": colpdh,
46                 "manifest_text": collections["items"][0]["manifest_text"]
47             }, ensure_unique_name=True).execute(
48                 num_retries=self.arvrunner.num_retries)
49
50     return done_outputs(self, record, tmpdir, outdir, keepdir)
51
52 def done_outputs(self, record, tmpdir, outdir, keepdir):
53     self.builder.outdir = outdir
54     self.builder.pathmapper.keepdir = keepdir
55     return self.collect_outputs("keep:" + record["output"], record["exit_code"])
56
57 crunchstat_re = re.compile(r"^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d [a-z0-9]{5}-8i9sb-[a-z0-9]{15} \d+ \d+ stderr crunchstat:")
58 timestamp_re = re.compile(r"^(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z) (.*)")
59
60 def logtail(logcollection, logfunc, header, maxlen=25):
61     if len(logcollection) == 0:
62         logfunc("%s\n%s", header, "  ** log is empty **")
63         return
64
65     containersapi = ("crunch-run.txt" in logcollection)
66     mergelogs = {}
67
68     for log in list(logcollection):
69         if not containersapi or log in ("crunch-run.txt", "stdout.txt", "stderr.txt"):
70             logname = log[:-4]
71             logt = deque([], maxlen)
72             mergelogs[logname] = logt
73             with logcollection.open(log, encoding="utf-8") as f:
74                 for l in f:
75                     if containersapi:
76                         g = timestamp_re.match(l)
77                         logt.append((g.group(1), g.group(2)))
78                     elif not crunchstat_re.match(l):
79                         logt.append(l)
80
81     if containersapi:
82         keys = list(mergelogs)
83         loglines = []
84         while True:
85             earliest = None
86             for k in keys:
87                 if mergelogs[k]:
88                     if earliest is None or mergelogs[k][0][0] < mergelogs[earliest][0][0]:
89                         earliest = k
90             if earliest is None:
91                 break
92             ts, msg = mergelogs[earliest].popleft()
93             loglines.append("%s %s %s" % (ts, earliest, msg))
94         loglines = loglines[-maxlen:]
95     else:
96         loglines = mergelogs[list(mergelogs)[0]]
97
98     logtxt = "\n  ".join(l.strip() for l in loglines)
99     logfunc("%s\n\n  %s", header, logtxt)