[ARVADOS] created: df3ed250cc8b1a15dee908690e0eb61372a04797

git at public.curoverse.com git at public.curoverse.com
Tue Dec 15 18:43:10 EST 2015


        at  df3ed250cc8b1a15dee908690e0eb61372a04797 (commit)


commit df3ed250cc8b1a15dee908690e0eb61372a04797
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Dec 15 18:42:44 2015 -0500

    7901: Add job stats, elapsed time, summed user+sys and tx+rx, and some human-readable highlights.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 6516018..926e40e 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -11,14 +11,27 @@ class Summarizer(object):
         self.args = args
 
     def run(self):
-        stats_max = {}
+        stats_max = {'time': {}}
+        task_stats = {}
         for line in self._logdata():
+            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
+            if m:
+                task_id = m.group('seq')
+                elapsed = int(m.group('elapsed'))
+                task_stats.setdefault(task_id, {})
+                task_stats[task_id]['time'] = {
+                    'elapsed': elapsed}
+                if elapsed > stats_max['time'].get('elapsed', float('-Inf')):
+                    stats_max['time']['elapsed'] = elapsed
+                continue
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
             if m.group('category').endswith(':'):
                 # "notice:" etc.
                 continue
+            task_id = m.group('seq')
+            task_stats.setdefault(task_id, {})
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -26,12 +39,19 @@ class Summarizer(object):
                 category = m.group('category')
                 if category not in stats_max:
                     stats_max[category] = {}
+                task_stats[task_id].setdefault(category, {})
                 words = m.group(group).split(' ')
+                stats = {}
                 for val, stat in zip(words[::2], words[1::2]):
                     if '.' in val:
-                        val = float(val)
+                        stats[stat] = float(val)
                     else:
-                        val = int(val)
+                        stats[stat] = int(val)
+                if 'user' in stats or 'sys' in stats:
+                    stats['tot'] = stats.get('user', 0) + stats.get('sys', 0)
+                if 'tx' in stats or 'rx' in stats:
+                    stats['tot'] = stats.get('tx', 0) + stats.get('rx', 0)
+                for stat, val in stats.iteritems():
                     if group == 'interval':
                         if stat == 'seconds':
                             this_interval_s = val
@@ -44,26 +64,68 @@ class Summarizer(object):
                         else:
                             stat = stat + '__rate'
                             val = val / this_interval_s
+                    else:
+                        task_stats[task_id][category][stat] = val
                     if val > stats_max[category].get(stat, float('-Inf')):
                         stats_max[category][stat] = val
         self.stats_max = stats_max
+        self.task_stats = task_stats
 
     def report(self):
         return "\n".join(self._report_gen()) + "\n"
 
     def _report_gen(self):
-        yield "\t".join(['category', 'metric', 'max', 'max_rate'])
+        job_tot = {}
+        for task_id, task_stat in self.task_stats.iteritems():
+            for category, stat_last in task_stat.iteritems():
+                if category not in job_tot:
+                    job_tot[category] = {}
+                for stat, val in stat_last.iteritems():
+                    if stat == 'cpus':
+                        # 16 cpu cores x 5 tasks = meaningless stat
+                        continue
+                    job_tot[category].setdefault(stat, 0)
+                    job_tot[category][stat] += val
+        yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
         for category, stat_max in self.stats_max.iteritems():
             for stat, val in stat_max.iteritems():
                 if stat.endswith('__rate'):
                     continue
-                if stat+'__rate' in stat_max:
-                    max_rate = '{:.2f}'.format(stat_max[stat+'__rate'])
-                else:
-                    max_rate = '-'
-                if isinstance(val, float):
-                    val = '{:.2f}'.format(val)
-                yield "\t".join([category, stat, str(val), max_rate])
+                max_rate = self._format(stat_max.get(stat+'__rate', '-'))
+                val = self._format(val)
+                tot = self._format(job_tot[category].get(stat, '-'))
+                yield "\t".join([category, stat, str(val), max_rate, tot])
+        for args in [['Max CPU time spent by a single task: {}s',
+                      self.stats_max['cpu'].get('tot'),
+                      None],
+                     ['Max CPU usage in a single interval: {}%',
+                      self.stats_max['cpu'].get('tot__rate'),
+                      lambda x: x * 100],
+                     ['Overall CPU usage: {}%',
+                      job_tot['cpu']['tot'] / job_tot['time']['elapsed'],
+                      lambda x: x * 100],
+                     ['Max memory used by a single task: {}GB',
+                      self.stats_max['mem'].get('rss'),
+                      lambda x: x / 1e9],
+                     ['Max network traffic in a single task: {}GB',
+                      self.stats_max['net:eth0'].get('tot'),
+                      lambda x: x / 1e9],
+                     ['Max network speed in a single interval: {}MB/s',
+                      self.stats_max['net:eth0'].get('tot__rate'),
+                      lambda x: x / 1e6]]:
+            format_string, val, transform = args
+            if val is None:
+                continue
+            if transform:
+                val = transform(val)
+            yield "# "+format_string.format(self._format(val))
+
+    def _format(self, val):
+        """Return a string representation of a stat: {:.2f} for floats, etc."""
+        if isinstance(val, float):
+            return '{:.2f}'.format(val)
+        else:
+            return '{}'.format(val)
 
     def _logdata(self):
         if self.args.log_file:

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list