[ARVADOS] updated: 0d66f5f5c5173f0faad6318a9ac87d11964e5748

git at public.curoverse.com git at public.curoverse.com
Thu Dec 17 10:07:36 EST 2015


Summary of changes:
 .../crunchstat_summary/summarizer.py               | 98 ++++++++++++++++++----
 .../tests/logfile_20151204190335.txt.gz.report     | 50 ++++++-----
 .../tests/logfile_20151210063411.txt.gz.report     | 27 +++---
 .../tests/logfile_20151210063439.txt.gz.report     | 27 +++---
 4 files changed, 144 insertions(+), 58 deletions(-)

       via  0d66f5f5c5173f0faad6318a9ac87d11964e5748 (commit)
       via  4d72990a86b6f1c136a8fc5c644767af68f75eb1 (commit)
      from  38b15ad14b941e8935bdfae3ddcdb83f7d8e20af (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 0d66f5f5c5173f0faad6318a9ac87d11964e5748
Merge: 38b15ad 4d72990
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Dec 17 10:06:50 2015 -0500

    Merge branch '7901-human-summary' closes #7901


commit 4d72990a86b6f1c136a8fc5c644767af68f75eb1
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Dec 17 10:05:30 2015 -0500

    7901: Add job stats, elapsed time, summed user+sys and tx+rx, and some human-readable highlights.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 6516018..ac0964b 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -1,6 +1,8 @@
 from __future__ import print_function
 
 import arvados
+import collections
+import functools
 import gzip
 import re
 import sys
@@ -11,27 +13,46 @@ class Summarizer(object):
         self.args = args
 
     def run(self):
-        stats_max = {}
+        # stats_max: {category: {stat: val}}
+        self.stats_max = collections.defaultdict(
+            functools.partial(collections.defaultdict,
+                              lambda: float('-Inf')))
+        # task_stats: {task_id: {category: {stat: val}}}
+        self.task_stats = collections.defaultdict(
+            functools.partial(collections.defaultdict, dict))
         for line in self._logdata():
+            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
+            if m:
+                task_id = m.group('seq')
+                elapsed = int(m.group('elapsed'))
+                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
+                if elapsed > self.stats_max['time']['elapsed']:
+                    self.stats_max['time']['elapsed'] = elapsed
+                continue
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
             if m.group('category').endswith(':'):
                 # "notice:" etc.
                 continue
+            task_id = m.group('seq')
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
                     continue
                 category = m.group('category')
-                if category not in stats_max:
-                    stats_max[category] = {}
                 words = m.group(group).split(' ')
+                stats = {}
                 for val, stat in zip(words[::2], words[1::2]):
                     if '.' in val:
-                        val = float(val)
+                        stats[stat] = float(val)
                     else:
-                        val = int(val)
+                        stats[stat] = int(val)
+                if 'user' in stats or 'sys' in stats:
+                    stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
+                if 'tx' in stats or 'rx' in stats:
+                    stats['tx+rx'] = stats.get('tx', 0) + stats.get('rx', 0)
+                for stat, val in stats.iteritems():
                     if group == 'interval':
                         if stat == 'seconds':
                             this_interval_s = val
@@ -44,26 +65,67 @@ class Summarizer(object):
                         else:
                             stat = stat + '__rate'
                             val = val / this_interval_s
-                    if val > stats_max[category].get(stat, float('-Inf')):
-                        stats_max[category][stat] = val
-        self.stats_max = stats_max
+                    else:
+                        self.task_stats[task_id][category][stat] = val
+                    if val > self.stats_max[category][stat]:
+                        self.stats_max[category][stat] = val
 
     def report(self):
         return "\n".join(self._report_gen()) + "\n"
 
     def _report_gen(self):
-        yield "\t".join(['category', 'metric', 'max', 'max_rate'])
-        for category, stat_max in self.stats_max.iteritems():
-            for stat, val in stat_max.iteritems():
+        job_tot = collections.defaultdict(
+            functools.partial(collections.defaultdict, int))
+        for task_id, task_stat in self.task_stats.iteritems():
+            for category, stat_last in task_stat.iteritems():
+                for stat, val in stat_last.iteritems():
+                    if stat in ['cpus', 'cache', 'swap', 'rss']:
+                        # meaningless stats like 16 cpu cores x 5 tasks = 80
+                        continue
+                    job_tot[category][stat] += val
+        yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
+        for category, stat_max in sorted(self.stats_max.iteritems()):
+            for stat, val in sorted(stat_max.iteritems()):
                 if stat.endswith('__rate'):
                     continue
-                if stat+'__rate' in stat_max:
-                    max_rate = '{:.2f}'.format(stat_max[stat+'__rate'])
-                else:
-                    max_rate = '-'
-                if isinstance(val, float):
-                    val = '{:.2f}'.format(val)
-                yield "\t".join([category, stat, str(val), max_rate])
+                max_rate = self._format(stat_max.get(stat+'__rate', '-'))
+                val = self._format(val)
+                tot = self._format(job_tot[category].get(stat, '-'))
+                yield "\t".join([category, stat, str(val), max_rate, tot])
+        for args in (
+                ('Max CPU time spent by a single task: {}s',
+                 self.stats_max['cpu']['user+sys'],
+                 None),
+                ('Max CPU usage in a single interval: {}%',
+                 self.stats_max['cpu']['user+sys__rate'],
+                 lambda x: x * 100),
+                ('Overall CPU usage: {}%',
+                 job_tot['cpu']['user+sys'] / job_tot['time']['elapsed'],
+                 lambda x: x * 100),
+                ('Max memory used by a single task: {}GB',
+                 self.stats_max['mem']['rss'],
+                 lambda x: x / 1e9),
+                ('Max network traffic in a single task: {}GB',
+                 self.stats_max['net:eth0']['tx+rx'],
+                 lambda x: x / 1e9),
+                ('Max network speed in a single interval: {}MB/s',
+                 self.stats_max['net:eth0']['tx+rx__rate'],
+                 lambda x: x / 1e6)):
+            format_string, val, transform = args
+            if val == float('-Inf'):
+                continue
+            if transform:
+                val = transform(val)
+            yield "# "+format_string.format(self._format(val))
+
+    def _format(self, val):
+        """Return a string representation of a stat.
+
+        {:.2f} for floats, default format for everything else."""
+        if isinstance(val, float):
+            return '{:.2f}'.format(val)
+        else:
+            return '{}'.format(val)
 
     def _logdata(self):
         if self.args.log_file:
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index 5ac1b28..ef7beb1 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -1,20 +1,30 @@
-category	metric	max	max_rate
-net:eth0	rx	1754364530	41658344.87
-net:eth0	tx	38837956	920817.97
-mem	rss	349814784	-
-mem	cache	1678139392	-
-mem	swap	0	-
-mem	pgmajfault	0	-
-keepcalls	put	0	0.00
-keepcalls	get	0	0.00
-keepcache	miss	0	0.00
-keepcache	hit	0	0.00
-blkio:0:0	write	0	0.00
-blkio:0:0	read	0	0.00
-net:keep0	rx	0	0.00
-net:keep0	tx	0	0.00
-cpu	sys	1.92	0.04
-cpu	user	3.83	0.09
-cpu	cpus	8	-
-fuseops	write	0	0.00
-fuseops	read	0	0.00
+category	metric	task_max	task_max_rate	job_total
+blkio:0:0	read	0	0.00	0
+blkio:0:0	write	0	0.00	0
+cpu	cpus	8	-	-
+cpu	sys	1.92	0.04	1.92
+cpu	user	3.83	0.09	3.83
+cpu	user+sys	5.75	0.13	5.75
+fuseops	read	0	0.00	0
+fuseops	write	0	0.00	0
+keepcache	hit	0	0.00	0
+keepcache	miss	0	0.00	0
+keepcalls	get	0	0.00	0
+keepcalls	put	0	0.00	0
+mem	cache	1678139392	-	-
+mem	pgmajfault	0	-	0
+mem	rss	349814784	-	-
+mem	swap	0	-	-
+net:eth0	rx	1754364530	41658344.87	1754364530
+net:eth0	tx	38837956	920817.97	38837956
+net:eth0	tx+rx	1793202486	42579162.83	1793202486
+net:keep0	rx	0	0.00	0
+net:keep0	tx	0	0.00	0
+net:keep0	tx+rx	0	0.00	0
+time	elapsed	80	-	80
+# Max CPU time spent by a single task: 5.75s
+# Max CPU usage in a single interval: 13.00%
+# Overall CPU usage: 7.19%
+# Max memory used by a single task: 0.35GB
+# Max network traffic in a single task: 1.79GB
+# Max network speed in a single interval: 42.58MB/s
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index d056317..38af3e7 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -1,10 +1,17 @@
-category	metric	max	max_rate
-mem	rss	856064	-
-mem	cache	12288	-
-mem	swap	0	-
-mem	pgmajfault	0	-
-cpu	sys	0.00	-
-cpu	user	0.00	-
-cpu	cpus	8	-
-net:eth0	rx	90	-
-net:eth0	tx	90	-
+category	metric	task_max	task_max_rate	job_total
+cpu	cpus	8	-	-
+cpu	sys	0.00	-	0.00
+cpu	user	0.00	-	0.00
+cpu	user+sys	0.00	-	0.00
+mem	cache	12288	-	-
+mem	pgmajfault	0	-	0
+mem	rss	856064	-	-
+mem	swap	0	-	-
+net:eth0	rx	90	-	90
+net:eth0	tx	90	-	90
+net:eth0	tx+rx	180	-	180
+time	elapsed	2	-	4
+# Max CPU time spent by a single task: 0.00s
+# Overall CPU usage: 0.00%
+# Max memory used by a single task: 0.00GB
+# Max network traffic in a single task: 0.00GB
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index d3939dc..7e42d61 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -1,10 +1,17 @@
-category	metric	max	max_rate
-mem	rss	450560	-
-mem	cache	8192	-
-mem	swap	0	-
-mem	pgmajfault	0	-
-cpu	sys	0.00	-
-cpu	user	0.00	-
-cpu	cpus	8	-
-net:eth0	rx	90	-
-net:eth0	tx	90	-
+category	metric	task_max	task_max_rate	job_total
+cpu	cpus	8	-	-
+cpu	sys	0.00	-	0.00
+cpu	user	0.00	-	0.00
+cpu	user+sys	0.00	-	0.00
+mem	cache	8192	-	-
+mem	pgmajfault	0	-	0
+mem	rss	450560	-	-
+mem	swap	0	-	-
+net:eth0	rx	90	-	90
+net:eth0	tx	90	-	90
+net:eth0	tx+rx	180	-	180
+time	elapsed	2	-	3
+# Max CPU time spent by a single task: 0.00s
+# Overall CPU usage: 0.00%
+# Max memory used by a single task: 0.00GB
+# Max network traffic in a single task: 0.00GB

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list