[ARVADOS] updated: dae5055c67ccf0577c740b4fd829bd5f0c1bf02e
Git user
git at public.curoverse.com
Wed Jan 4 20:29:29 EST 2017
Summary of changes:
.../crunchstat_summary/summarizer.py | 39 +++++++++++++---------
1 file changed, 24 insertions(+), 15 deletions(-)
via dae5055c67ccf0577c740b4fd829bd5f0c1bf02e (commit)
from e9ec095b1269d85eeb1b00c3921fe01e59ac5e0b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit dae5055c67ccf0577c740b4fd829bd5f0c1bf02e
Author: Tom Morris <tfmorris at curoverse.com>
Date: Wed Jan 4 20:28:15 2017 -0500
10472: add latent support for rolled up stats
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 8e601e9..f1c97c2 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -60,6 +60,21 @@ class Summarizer(object):
logger.debug("%s: logdata %s", self.label, logdata)
+ def run_child(self, uuid):
+ if self._skip_child_jobs:
+ logger.warning('%s: omitting stats from child job %s'
+ ' because --skip-child-jobs flag is on',
+ self.label, uuid)
+ return
+ logger.debug('%s: follow %s', self.label, uuid)
+ child_summarizer = JobSummarizer(uuid)
+ child_summarizer.stats_max = self.stats_max
+ child_summarizer.task_stats = self.task_stats
+ child_summarizer.tasks = self.tasks
+ child_summarizer.starttime = self.starttime
+ child_summarizer.run()
+ logger.debug('%s: done %s', self.label, uuid)
+
def run(self):
logger.debug("%s: parsing logdata %s", self.label, self._logdata)
for line in self._logdata:
@@ -80,22 +95,11 @@ class Summarizer(object):
self.stats_max['time']['elapsed'] = elapsed
continue
+ # Old style job logs only - newer style uses job['components']
+ uuid = None
m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
if m:
- uuid = m.group('uuid')
- if self._skip_child_jobs:
- logger.warning('%s: omitting stats from child job %s'
- ' because --skip-child-jobs flag is on',
- self.label, uuid)
- continue
- logger.debug('%s: follow %s', self.label, uuid)
- child_summarizer = JobSummarizer(uuid)
- child_summarizer.stats_max = self.stats_max
- child_summarizer.task_stats = self.task_stats
- child_summarizer.tasks = self.tasks
- child_summarizer.starttime = self.starttime
- child_summarizer.run()
- logger.debug('%s: done %s', self.label, uuid)
+ self.run_child(m.group('uuid'))
continue
m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
@@ -178,7 +182,12 @@ class Summarizer(object):
self.stats_max[category][stat] = val
except Exception as e:
logger.info('Skipping malformed line: {}Error was: {}\n'.format(line, e))
- logger.debug('%s: done parsing', self.label)
+ logger.debug('%s: done parsing log', self.label)
+
+ # Enabling this will roll up stats for all subjobs into the parent job
+ if False and 'components' in self.job:
+ for cname, component in self.job['components'].iteritems():
+ self.run_child(component)
self.job_tot = collections.defaultdict(
functools.partial(collections.defaultdict, int))
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list