[ARVADOS] created: 8a043c21eeb1d715ef34ee14233bedc31f49767c

Wed Jan 6 11:16:15 EST 2016

at  8a043c21eeb1d715ef34ee14233bedc31f49767c (commit)


commit 8a043c21eeb1d715ef34ee14233bedc31f49767c
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Dec 24 13:51:35 2015 -0500

    8123: Add option to include stats from child jobs.

diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index 2c7574c..a5339dd 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -21,6 +21,9 @@ class ArgumentParser(argparse.ArgumentParser):
             '--log-file', type=str,
             help='Read log data from a regular file')
         self.add_argument(
+            '--include-child-jobs', action='store_true',
+            help='Include stats from child jobs')
+        self.add_argument(
             '--format', type=str, choices=('html', 'text'), default='text',
             help='Report format')
         self.add_argument(
@@ -40,18 +43,21 @@ class Command(object):
             logger.setLevel(logging.INFO)
 
     def run(self):
+        kwargs = {
+            'include_child_jobs': self.args.include_child_jobs,
+        }
         if self.args.pipeline_instance:
-            self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
+            self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance, **kwargs)
         elif self.args.job:
-            self.summer = summarizer.JobSummarizer(self.args.job)
+            self.summer = summarizer.JobSummarizer(self.args.job, **kwargs)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = gzip.open(self.args.log_file)
             else:
                 fh = open(self.args.log_file)
-            self.summer = summarizer.Summarizer(fh)
+            self.summer = summarizer.Summarizer(fh, **kwargs)
         else:
-            self.summer = summarizer.Summarizer(sys.stdin)
+            self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
         return self.summer.run()
 
     def report(self):
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index ba1919f..ccee1e2 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -28,15 +28,14 @@ class Task(object):
 class Summarizer(object):
     existing_constraints = {}
 
-    def __init__(self, logdata, label=None):
+    def __init__(self, logdata, label=None, include_child_jobs=True):
         self._logdata = logdata
+
         self.label = label
         self.starttime = None
         self.finishtime = None
-        logger.debug("%s: logdata %s", self.label, repr(logdata))
+        self._include_child_jobs = include_child_jobs
 
-    def run(self):
-        logger.debug("%s: parsing log data", self.label)
         # stats_max: {category: {stat: val}}
         self.stats_max = collections.defaultdict(
             functools.partial(collections.defaultdict,
@@ -44,19 +43,52 @@ class Summarizer(object):
         # task_stats: {task_id: {category: {stat: val}}}
         self.task_stats = collections.defaultdict(
             functools.partial(collections.defaultdict, dict))
+
+        self.seq_to_uuid = {}
         self.tasks = collections.defaultdict(Task)
+
+        logger.debug("%s: logdata %s", self.label, repr(logdata))
+
+    def run(self):
+        logger.debug("%s: parsing log data", self.label)
         for line in self._logdata:
+            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
+            if m:
+                seq = int(m.group('seq'))
+                uuid = m.group('task_uuid')
+                self.seq_to_uuid[seq] = uuid
+                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
+                continue
+
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
             if m:
-                task_id = m.group('seq')
+                task_id = self.seq_to_uuid[int(m.group('seq'))]
                 elapsed = int(m.group('elapsed'))
                 self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
                 continue
+
+            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
+            if m:
+                uuid = m.group('uuid')
+                if not self._include_child_jobs:
+                    logger.warning('%s: omitting %s (try --include-child-job)',
+                                   self.label, uuid)
+                    continue
+                logger.debug('%s: follow %s', self.label, uuid)
+                child_summarizer = JobSummarizer(uuid)
+                child_summarizer.stats_max = self.stats_max
+                child_summarizer.task_stats = self.task_stats
+                child_summarizer.tasks = self.tasks
+                child_summarizer.run()
+                logger.debug('%s: done %s', self.label, uuid)
+                continue
+
             m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
+
             if self.label is None:
                 self.label = m.group('job_uuid')
                 logger.debug('%s: using job uuid as label', self.label)
@@ -65,7 +97,7 @@ class Summarizer(object):
                 continue
             elif m.group('category') == 'error':
                 continue
-            task_id = m.group('seq')
+            task_id = self.seq_to_uuid[int(m.group('seq'))]
             task = self.tasks[task_id]
 
             # Use the first and last crunchstat timestamps as
@@ -126,6 +158,8 @@ class Summarizer(object):
                         self.task_stats[task_id][category][stat] = val
                     if val > self.stats_max[category][stat]:
                         self.stats_max[category][stat] = val
+        logger.debug('%s: done parsing', self.label)
+
         self.job_tot = collections.defaultdict(
             functools.partial(collections.defaultdict, int))
         for task_id, task_stat in self.task_stats.iteritems():
@@ -135,6 +169,7 @@ class Summarizer(object):
                         # meaningless stats like 16 cpu cores x 5 tasks = 80
                         continue
                     self.job_tot[category][stat] += val
+        logger.debug('%s: done totals', self.label)
 
     def long_label(self):
         label = self.label
@@ -169,6 +204,9 @@ class Summarizer(object):
                 tot = self._format(self.job_tot[category].get(stat, '-'))
                 yield "\t".join([category, stat, str(val), max_rate, tot])
         for args in (
+                ('Number of tasks: {}',
+                 len(self.tasks),
+                 None),
                 ('Max CPU time spent by a single task: {}s',
                  self.stats_max['cpu']['user+sys'],
                  None),
@@ -249,7 +287,8 @@ class Summarizer(object):
 
 
 class CollectionSummarizer(Summarizer):
-    def __init__(self, collection_id):
+    def __init__(self, collection_id, **kwargs):
+        logger.debug('load collection %s', collection_id)
         collection = arvados.collection.CollectionReader(collection_id)
         filenames = [filename for filename in collection]
         if len(filenames) != 1:
@@ -257,12 +296,12 @@ class CollectionSummarizer(Summarizer):
                 "collection {} has {} files; need exactly one".format(
                     collection_id, len(filenames)))
         super(CollectionSummarizer, self).__init__(
-            collection.open(filenames[0]))
+            collection.open(filenames[0]), **kwargs)
         self.label = collection_id
 
 
 class JobSummarizer(CollectionSummarizer):
-    def __init__(self, job):
+    def __init__(self, job, **kwargs):
         arv = arvados.api('v1')
         if isinstance(job, str):
             self.job = arv.jobs().get(uuid=job).execute()
@@ -274,12 +313,12 @@ class JobSummarizer(CollectionSummarizer):
             raise ValueError(
                 "job {} has no log; live summary not implemented".format(
                     self.job['uuid']))
-        super(JobSummarizer, self).__init__(self.job['log'])
+        super(JobSummarizer, self).__init__(self.job['log'], **kwargs)
         self.label = self.job['uuid']
 
 
 class PipelineSummarizer():
-    def __init__(self, pipeline_instance_uuid):
+    def __init__(self, pipeline_instance_uuid, **kwargs):
         arv = arvados.api('v1', model=OrderedJsonModel())
         instance = arv.pipeline_instances().get(
             uuid=pipeline_instance_uuid).execute()
@@ -295,7 +334,7 @@ class PipelineSummarizer():
             else:
                 logger.info(
                     "%s: logdata %s", cname, component['job']['log'])
-                summarizer = JobSummarizer(component['job'])
+                summarizer = JobSummarizer(component['job'], **kwargs)
                 summarizer.label = cname
                 self.summarizers[cname] = summarizer
         self.label = pipeline_instance_uuid
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index b12e931..0ba0181 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -22,11 +22,12 @@ net:keep0	rx	0	0.00	0
 net:keep0	tx	0	0.00	0
 net:keep0	tx+rx	0	0.00	0
 time	elapsed	80	-	80
+# Number of tasks: 1
 # Max CPU time spent by a single task: 5.75s
 # Max CPU usage in a single interval: 13.00%
 # Overall CPU usage: 7.19%
 # Max memory used by a single task: 0.35GB
 # Max network traffic in a single task: 1.79GB
 # Max network speed in a single interval: 42.58MB/s
-#!! job max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
-#!! job max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index 8e1a2d8..0641bba 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -11,8 +11,9 @@ net:eth0	rx	90	-	90
 net:eth0	tx	90	-	90
 net:eth0	tx+rx	180	-	180
 time	elapsed	2	-	4
+# Number of tasks: 2
 # Max CPU time spent by a single task: 0.00s
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index dbe9321..19fe0ed 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -11,8 +11,9 @@ net:eth0	rx	90	-	90
 net:eth0	tx	90	-	90
 net:eth0	tx+rx	180	-	180
 time	elapsed	2	-	3
+# Number of tasks: 2
 # Max CPU time spent by a single task: 0.00s
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index 09e21a1..d35e81e 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -35,7 +35,7 @@ class SummarizeFile(ReportDiff):
 
 
 class SummarizeJob(ReportDiff):
-    fake_job_uuid = 'zzzzz-8i9sb-jjjjjjjjjjjjjjj'
+    fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
     fake_log_id = 'fake-log-collection-id'
     fake_job = {
         'uuid': fake_job_uuid,

commit b7134f0dd44bb63119787ae3dd8ef837b62995a0
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Dec 23 14:09:37 2015 -0500

    8123: Generate multiple sets of charts when data source is a pipeline instance.

diff --git a/tools/crunchstat-summary/crunchstat_summary/__init__.py b/tools/crunchstat-summary/crunchstat_summary/__init__.py
index e69de29..c10988d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/__init__.py
+++ b/tools/crunchstat-summary/crunchstat_summary/__init__.py
@@ -0,0 +1,4 @@
+import logging
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
index 3106e62..72ff7a4 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.js
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -1,11 +1,27 @@
 window.onload = function() {
-    var options = {};
-    chartData.forEach(function(data, idx) {
-        var div = document.createElement('div');
-        div.setAttribute('id', 'chart-'+idx);
-        div.setAttribute('style', 'width: 100%; height: 150px');
-        document.body.appendChild(div);
-        var chart = new CanvasJS.Chart('chart-'+idx, data);
-        chart.render();
+    var charts = {};
+    sections.forEach(function(section, section_idx) {
+        var h1 = document.createElement('h1');
+        h1.appendChild(document.createTextNode(section.label));
+        document.body.appendChild(h1);
+        section.charts.forEach(function(data, chart_idx) {
+            // Skip chart if every series has zero data points
+            if (0 == data.data.reduce(function(len, series) {
+                return len + series.dataPoints.length;
+            }, 0)) {
+                return;
+            }
+            var id = 'chart-'+section_idx+'-'+chart_idx;
+            var div = document.createElement('div');
+            div.setAttribute('id', id);
+            div.setAttribute('style', 'width: 100%; height: 150px');
+            document.body.appendChild(div);
+            charts[id] = new CanvasJS.Chart(id, data);
+            charts[id].render();
+        });
     });
-}
+
+    if (typeof window.debug === 'undefined')
+        window.debug = {};
+    window.debug.charts = charts;
+};
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
index 85b49b8..590df27 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -3,13 +3,15 @@ from __future__ import print_function
 import json
 import pkg_resources
 
+from crunchstat_summary import logger
+
 
 class ChartJS(object):
-    JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+    JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.min.js'
 
-    def __init__(self, label, tasks):
+    def __init__(self, label, summarizers):
         self.label = label
-        self.tasks = tasks
+        self.summarizers = summarizers
 
     def html(self):
         return '''<!doctype html><html><head>
@@ -20,31 +22,48 @@ class ChartJS(object):
         '''.format(self.label, self.JSLIB, self.js())
 
     def js(self):
-        return 'var chartData = {};\n{}'.format(
-            json.dumps(self.chartData()),
+        return 'var sections = {};\n{}'.format(
+            json.dumps(self.sections()),
             pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
 
-    def chartData(self):
-        maxpts = 0
-        for task in self.tasks.itervalues():
-            for series in task.series.itervalues():
-                maxpts = max(maxpts, len(series))
+    def sections(self):
         return [
             {
-                'title': {
-                    'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+                'label': s.long_label(),
+                'charts': self.charts(s.label, s.tasks),
+            }
+            for s in self.summarizers]
+
+    def charts(self, label, tasks):
+        return [
+            {
+                'axisY': {
+                    'minimum': 0,
                 },
                 'data': [
                     {
                         'type': 'line',
-                        'dataPoints': [
-                            {'x': pt[0].total_seconds(), 'y': pt[1]}
-                            for pt in task.series[stat]]
+                        'markerType': 'none',
+                        'dataPoints': self._datapoints(
+                            label=uuid, task=task, series=task.series[stat]),
                     }
-                    for label, task in self.tasks.iteritems()
+                    for uuid, task in tasks.iteritems()
                 ],
+                'title': {
+                    'text': '{}: {} {}'.format(label, stat[0], stat[1]),
+                },
+                'zoomEnabled': True,
             }
             for stat in (('cpu', 'user+sys__rate'),
+                         ('mem', 'rss'),
                          ('net:eth0', 'tx+rx__rate'),
-                         ('mem', 'rss'))
-        ]
+                         ('net:keep0', 'tx+rx__rate'))]
+
+    def _datapoints(self, label, task, series):
+        points = [
+            {'x': pt[0].total_seconds(), 'y': pt[1]}
+            for pt in series]
+        if len(points) > 0:
+            points[-1]['markerType'] = 'cross'
+            points[-1]['markerSize'] = 12
+        return points
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index c28b00f..2c7574c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -1,8 +1,9 @@
 import argparse
 import gzip
+import logging
 import sys
 
-from crunchstat_summary import summarizer
+from crunchstat_summary import logger, summarizer
 
 
 class ArgumentParser(argparse.ArgumentParser):
@@ -22,11 +23,21 @@ class ArgumentParser(argparse.ArgumentParser):
         self.add_argument(
             '--format', type=str, choices=('html', 'text'), default='text',
             help='Report format')
+        self.add_argument(
+            '--verbose', action='store_true',
+            help='Write progress messages to stderr')
+        self.add_argument(
+            '--debug', action='store_true',
+            help='Write debug messages to stderr')
 
 
 class Command(object):
     def __init__(self, args):
         self.args = args
+        if args.debug:
+            logger.setLevel(logging.DEBUG)
+        elif args.verbose:
+            logger.setLevel(logging.INFO)
 
     def run(self):
         if self.args.pipeline_instance:
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 8954b90..ba1919f 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -6,13 +6,12 @@ import crunchstat_summary.chartjs
 import datetime
 import functools
 import itertools
-import logging
 import math
 import re
 import sys
 
-logger = logging.getLogger(__name__)
-logger.addHandler(logging.NullHandler())
+from arvados.api import OrderedJsonModel
+from crunchstat_summary import logger
 
 # Recommend memory constraints that are this multiple of an integral
 # number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
@@ -29,11 +28,15 @@ class Task(object):
 class Summarizer(object):
     existing_constraints = {}
 
-    def __init__(self, logdata, label='job'):
+    def __init__(self, logdata, label=None):
         self._logdata = logdata
         self.label = label
+        self.starttime = None
+        self.finishtime = None
+        logger.debug("%s: logdata %s", self.label, repr(logdata))
 
     def run(self):
+        logger.debug("%s: parsing log data", self.label)
         # stats_max: {category: {stat: val}}
         self.stats_max = collections.defaultdict(
             functools.partial(collections.defaultdict,
@@ -51,20 +54,34 @@ class Summarizer(object):
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
                 continue
-            m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+            m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
+            if self.label is None:
+                self.label = m.group('job_uuid')
+                logger.debug('%s: using job uuid as label', self.label)
             if m.group('category').endswith(':'):
                 # "notice:" etc.
                 continue
             elif m.group('category') == 'error':
                 continue
             task_id = m.group('seq')
+            task = self.tasks[task_id]
+
+            # Use the first and last crunchstat timestamps as
+            # approximations of starttime and finishtime.
             timestamp = datetime.datetime.strptime(
                 m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
-            task = self.tasks[task_id]
             if not task.starttime:
                 task.starttime = timestamp
+                logger.debug('%s: task %s starttime %s',
+                             self.label, task_id, timestamp)
+            task.finishtime = timestamp
+
+            if not self.starttime:
+                self.starttime = timestamp
+            self.finishtime = timestamp
+
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -119,13 +136,27 @@ class Summarizer(object):
                         continue
                     self.job_tot[category][stat] += val
 
+    def long_label(self):
+        label = self.label
+        if self.finishtime:
+            label += ' -- elapsed time '
+            s = (self.finishtime - self.starttime).total_seconds()
+            if s > 86400:
+                label += '{}d'.format(int(s/86400))
+            if s > 3600:
+                label += '{}h'.format(int(s/3600) % 24)
+            if s > 60:
+                label += '{}m'.format(int(s/60) % 60)
+            label += '{}s'.format(int(s) % 60)
+        return label
+
     def text_report(self):
         return "\n".join(itertools.chain(
             self._text_report_gen(),
             self._recommend_gen())) + "\n"
 
     def html_report(self):
-        return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+        return crunchstat_summary.chartjs.ChartJS(self.label, [self]).html()
 
     def _text_report_gen(self):
         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
@@ -227,6 +258,7 @@ class CollectionSummarizer(Summarizer):
                     collection_id, len(filenames)))
         super(CollectionSummarizer, self).__init__(
             collection.open(filenames[0]))
+        self.label = collection_id
 
 
 class JobSummarizer(CollectionSummarizer):
@@ -243,11 +275,12 @@ class JobSummarizer(CollectionSummarizer):
                 "job {} has no log; live summary not implemented".format(
                     self.job['uuid']))
         super(JobSummarizer, self).__init__(self.job['log'])
+        self.label = self.job['uuid']
 
 
 class PipelineSummarizer():
     def __init__(self, pipeline_instance_uuid):
-        arv = arvados.api('v1')
+        arv = arvados.api('v1', model=OrderedJsonModel())
         instance = arv.pipeline_instances().get(
             uuid=pipeline_instance_uuid).execute()
         self.summarizers = collections.OrderedDict()
@@ -260,11 +293,12 @@ class PipelineSummarizer():
                     "%s: skipping job %s with no log available",
                     cname, component['job'].get('uuid'))
             else:
-                logger.debug(
-                    "%s: reading log from %s", cname, component['job']['log'])
+                logger.info(
+                    "%s: logdata %s", cname, component['job']['log'])
                 summarizer = JobSummarizer(component['job'])
                 summarizer.label = cname
                 self.summarizers[cname] = summarizer
+        self.label = pipeline_instance_uuid
 
     def run(self):
         for summarizer in self.summarizers.itervalues():
@@ -278,3 +312,7 @@ class PipelineSummarizer():
             txt += summarizer.text_report()
             txt += '\n'
         return txt
+
+    def html_report(self):
+        return crunchstat_summary.chartjs.ChartJS(
+            self.label, self.summarizers.itervalues()).html()

commit ed0973ff9f12af019025b0c6e100e718f698c37b
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Dec 23 11:39:14 2015 -0500

    8123: Add option (--format html) to generate canvasjs charts.

diff --git a/tools/crunchstat-summary/bin/crunchstat-summary b/tools/crunchstat-summary/bin/crunchstat-summary
index c32b50e..e16bd8e 100755
--- a/tools/crunchstat-summary/bin/crunchstat-summary
+++ b/tools/crunchstat-summary/bin/crunchstat-summary
@@ -10,6 +10,6 @@ import sys
 logging.getLogger().addHandler(logging.StreamHandler())
 
 args = crunchstat_summary.command.ArgumentParser().parse_args(sys.argv[1:])
-s = crunchstat_summary.command.Command(args).summarizer()
-s.run()
-print(s.report(), end='')
+cmd = crunchstat_summary.command.Command(args)
+cmd.run()
+print(cmd.report(), end='')
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
new file mode 100644
index 0000000..3106e62
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -0,0 +1,11 @@
+window.onload = function() {
+    var options = {};
+    chartData.forEach(function(data, idx) {
+        var div = document.createElement('div');
+        div.setAttribute('id', 'chart-'+idx);
+        div.setAttribute('style', 'width: 100%; height: 150px');
+        document.body.appendChild(div);
+        var chart = new CanvasJS.Chart('chart-'+idx, data);
+        chart.render();
+    });
+}
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
new file mode 100644
index 0000000..85b49b8
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+
+import json
+import pkg_resources
+
+
+class ChartJS(object):
+    JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+
+    def __init__(self, label, tasks):
+        self.label = label
+        self.tasks = tasks
+
+    def html(self):
+        return '''<!doctype html><html><head>
+        <title>{} stats</title>
+        <script type="text/javascript" src="{}"></script>
+        <script type="text/javascript">{}</script>
+        </head><body></body></html>
+        '''.format(self.label, self.JSLIB, self.js())
+
+    def js(self):
+        return 'var chartData = {};\n{}'.format(
+            json.dumps(self.chartData()),
+            pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
+
+    def chartData(self):
+        maxpts = 0
+        for task in self.tasks.itervalues():
+            for series in task.series.itervalues():
+                maxpts = max(maxpts, len(series))
+        return [
+            {
+                'title': {
+                    'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+                },
+                'data': [
+                    {
+                        'type': 'line',
+                        'dataPoints': [
+                            {'x': pt[0].total_seconds(), 'y': pt[1]}
+                            for pt in task.series[stat]]
+                    }
+                    for label, task in self.tasks.iteritems()
+                ],
+            }
+            for stat in (('cpu', 'user+sys__rate'),
+                         ('net:eth0', 'tx+rx__rate'),
+                         ('mem', 'rss'))
+        ]
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index fc37190..c28b00f 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -19,22 +19,32 @@ class ArgumentParser(argparse.ArgumentParser):
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
+        self.add_argument(
+            '--format', type=str, choices=('html', 'text'), default='text',
+            help='Report format')
 
 
 class Command(object):
     def __init__(self, args):
         self.args = args
 
-    def summarizer(self):
+    def run(self):
         if self.args.pipeline_instance:
-            return summarizer.PipelineSummarizer(self.args.pipeline_instance)
+            self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
         elif self.args.job:
-            return summarizer.JobSummarizer(self.args.job)
+            self.summer = summarizer.JobSummarizer(self.args.job)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = gzip.open(self.args.log_file)
             else:
                 fh = open(self.args.log_file)
-            return summarizer.Summarizer(fh)
+            self.summer = summarizer.Summarizer(fh)
         else:
-            return summarizer.Summarizer(sys.stdin)
+            self.summer = summarizer.Summarizer(sys.stdin)
+        return self.summer.run()
+
+    def report(self):
+        if self.args.format == 'html':
+            return self.summer.html_report()
+        elif self.args.format == 'text':
+            return self.summer.text_report()
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index d221363..8954b90 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -2,6 +2,8 @@ from __future__ import print_function
 
 import arvados
 import collections
+import crunchstat_summary.chartjs
+import datetime
 import functools
 import itertools
 import logging
@@ -17,6 +19,13 @@ logger.addHandler(logging.NullHandler())
 # that have amounts like 7.5 GiB according to the kernel.)
 AVAILABLE_RAM_RATIO = 0.95
 
+
+class Task(object):
+    def __init__(self):
+        self.starttime = None
+        self.series = collections.defaultdict(list)
+
+
 class Summarizer(object):
     existing_constraints = {}
 
@@ -32,6 +41,7 @@ class Summarizer(object):
         # task_stats: {task_id: {category: {stat: val}}}
         self.task_stats = collections.defaultdict(
             functools.partial(collections.defaultdict, dict))
+        self.tasks = collections.defaultdict(Task)
         for line in self._logdata:
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
             if m:
@@ -41,7 +51,7 @@ class Summarizer(object):
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
                 continue
-            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+            m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
             if m.group('category').endswith(':'):
@@ -50,6 +60,11 @@ class Summarizer(object):
             elif m.group('category') == 'error':
                 continue
             task_id = m.group('seq')
+            timestamp = datetime.datetime.strptime(
+                m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
+            task = self.tasks[task_id]
+            if not task.starttime:
+                task.starttime = timestamp
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -84,7 +99,13 @@ class Summarizer(object):
                         else:
                             stat = stat + '__rate'
                             val = val / this_interval_s
+                            if stat in ['user+sys__rate', 'tx+rx__rate']:
+                                task.series[category, stat].append(
+                                    (timestamp - task.starttime, val))
                     else:
+                        if stat in ['rss']:
+                            task.series[category, stat].append(
+                                (timestamp - task.starttime, val))
                         self.task_stats[task_id][category][stat] = val
                     if val > self.stats_max[category][stat]:
                         self.stats_max[category][stat] = val
@@ -98,12 +119,15 @@ class Summarizer(object):
                         continue
                     self.job_tot[category][stat] += val
 
-    def report(self):
+    def text_report(self):
         return "\n".join(itertools.chain(
-            self._report_gen(),
+            self._text_report_gen(),
             self._recommend_gen())) + "\n"
 
-    def _report_gen(self):
+    def html_report(self):
+        return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+
+    def _text_report_gen(self):
         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
         for category, stat_max in sorted(self.stats_max.iteritems()):
             for stat, val in sorted(stat_max.iteritems()):
@@ -246,11 +270,11 @@ class PipelineSummarizer():
         for summarizer in self.summarizers.itervalues():
             summarizer.run()
 
-    def report(self):
+    def text_report(self):
         txt = ''
         for cname, summarizer in self.summarizers.iteritems():
             txt += '### Summary for {} ({})\n'.format(
                 cname, summarizer.job['uuid'])
-            txt += summarizer.report()
+            txt += summarizer.text_report()
             txt += '\n'
         return txt
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index cf810ae..09e21a1 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -1,7 +1,6 @@
 import arvados
 import collections
 import crunchstat_summary.command
-import crunchstat_summary.summarizer
 import difflib
 import glob
 import gzip
@@ -9,17 +8,17 @@ import mock
 import os
 import unittest
 
-
 TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
+
 class ReportDiff(unittest.TestCase):
-    def diff_known_report(self, logfile, summarizer):
+    def diff_known_report(self, logfile, cmd):
         expectfile = logfile+'.report'
         expect = open(expectfile).readlines()
-        self.diff_report(summarizer, expect, expectfile=expectfile)
+        self.diff_report(cmd, expect, expectfile=expectfile)
 
-    def diff_report(self, summarizer, expect, expectfile=None):
-        got = [x+"\n" for x in summarizer.report().strip("\n").split("\n")]
+    def diff_report(self, cmd, expect, expectfile=None):
+        got = [x+"\n" for x in cmd.report().strip("\n").split("\n")]
         self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
             expect, got, fromfile=expectfile, tofile="(generated)")))
 
@@ -30,9 +29,9 @@ class SummarizeFile(ReportDiff):
             logfile = os.path.join(TESTS_DIR, fnm)
             args = crunchstat_summary.command.ArgumentParser().parse_args(
                 ['--log-file', logfile])
-            summarizer = crunchstat_summary.command.Command(args).summarizer()
-            summarizer.run()
-            self.diff_known_report(logfile, summarizer)
+            cmd = crunchstat_summary.command.Command(args)
+            cmd.run()
+            self.diff_known_report(logfile, cmd)
 
 
 class SummarizeJob(ReportDiff):
@@ -52,9 +51,9 @@ class SummarizeJob(ReportDiff):
         mock_cr().open.return_value = gzip.open(self.logfile)
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_job_uuid])
-        summarizer = crunchstat_summary.command.Command(args).summarizer()
-        summarizer.run()
-        self.diff_known_report(self.logfile, summarizer)
+        cmd = crunchstat_summary.command.Command(args)
+        cmd.run()
+        self.diff_known_report(self.logfile, cmd)
         mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
         mock_cr.assert_called_with(self.fake_log_id)
         mock_cr().open.assert_called_with('fake-logfile.txt')
@@ -113,8 +112,8 @@ class SummarizePipeline(ReportDiff):
         mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--pipeline-instance', self.fake_instance['uuid']])
-        summarizer = crunchstat_summary.command.Command(args).summarizer()
-        summarizer.run()
+        cmd = crunchstat_summary.command.Command(args)
+        cmd.run()
 
         job_report = [
             line for line in open(logfile+'.report').readlines()
@@ -126,7 +125,7 @@ class SummarizePipeline(ReportDiff):
             job_report + ['\n'] +
             ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
             job_report)
-        self.diff_report(summarizer, expect)
+        self.diff_report(cmd, expect)
         mock_cr.assert_has_calls(
             [
                 mock.call('fake-log-pdh-0'),

commit 99bc77234964b8dcfab89605e75bbab917b350a2
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Dec 22 16:16:15 2015 -0500

    8123: Aim 5% below GiB memory size boundaries.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 49b67ff..d221363 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -12,6 +12,11 @@ import sys
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
+# Recommend memory constraints that are this multiple of an integral
+# number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
+# that have amounts like 7.5 GiB according to the kernel.)
+AVAILABLE_RAM_RATIO = 0.95
+
 class Summarizer(object):
     existing_constraints = {}
 
@@ -159,7 +164,7 @@ class Summarizer(object):
                 int(used_cores))
 
     def _recommend_ram(self):
-        """Recommend asking for 2048 MiB RAM if max rss was 1248 MiB"""
+        """Recommend asking for (2048*0.95) MiB RAM if max rss was 1248 MiB"""
 
         used_ram = self.stats_max['mem']['rss']
         if used_ram == float('-Inf'):
@@ -167,14 +172,16 @@ class Summarizer(object):
             return
         used_ram = math.ceil(float(used_ram) / (1<<20))
         asked_ram = self.existing_constraints.get('min_ram_mb_per_node')
-        if asked_ram is None or math.ceil(used_ram/(1<<10)) < asked_ram/(1<<10):
+        if asked_ram is None or (
+                math.ceil((used_ram/AVAILABLE_RAM_RATIO)/(1<<10)) <
+                (asked_ram/AVAILABLE_RAM_RATIO)/(1<<10)):
             yield (
-                '#!! {} never used more than {} MiB RAM -- '
+                '#!! {} max RSS was {} MiB -- '
                 'try runtime_constraints "min_ram_mb_per_node":{}'
             ).format(
                 self.label,
                 int(used_ram),
-                int(math.ceil(used_ram/(1<<10))*(1<<10)))
+                int(math.ceil((used_ram/AVAILABLE_RAM_RATIO)/(1<<10))*(1<<10)*AVAILABLE_RAM_RATIO))
 
     def _format(self, val):
         """Return a string representation of a stat.
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index c94cd24..b12e931 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -29,4 +29,4 @@ time	elapsed	80	-	80
 # Max network traffic in a single task: 1.79GB
 # Max network speed in a single interval: 42.58MB/s
 #!! job max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
-#!! job never used more than 334 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
+#!! job max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index e711824..8e1a2d8 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -15,4 +15,4 @@ time	elapsed	2	-	4
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job never used more than 1 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
+#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index 5772cb4..dbe9321 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -15,4 +15,4 @@ time	elapsed	2	-	3
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job never used more than 1 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
+#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index a19d7ad..cf810ae 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -70,7 +70,7 @@ class SummarizePipeline(ReportDiff):
                     'uuid': 'zzzzz-8i9sb-000000000000000',
                     'log': 'fake-log-pdh-0',
                     'runtime_constraints': {
-                        'min_ram_mb_per_node': 1024,
+                        'min_ram_mb_per_node': 900,
                         'min_cores_per_node': 1,
                     },
                 },
@@ -80,7 +80,7 @@ class SummarizePipeline(ReportDiff):
                     'uuid': 'zzzzz-8i9sb-000000000000001',
                     'log': 'fake-log-pdh-1',
                     'runtime_constraints': {
-                        'min_ram_mb_per_node': 1024,
+                        'min_ram_mb_per_node': 900,
                         'min_cores_per_node': 1,
                     },
                 },
@@ -96,7 +96,7 @@ class SummarizePipeline(ReportDiff):
                     'uuid': 'zzzzz-8i9sb-000000000000002',
                     'log': 'fake-log-pdh-2',
                     'runtime_constraints': {
-                        'min_ram_mb_per_node': 1024,
+                        'min_ram_mb_per_node': 900,
                         'min_cores_per_node': 1,
                     },
                 },

commit dc47b7ad046754ab31613bd264aafcffbdc453dc
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Dec 21 16:53:37 2015 -0500

    8123: Recommend more economical values for resource_constraints.

diff --git a/tools/crunchstat-summary/bin/crunchstat-summary b/tools/crunchstat-summary/bin/crunchstat-summary
index f42d30b..c32b50e 100755
--- a/tools/crunchstat-summary/bin/crunchstat-summary
+++ b/tools/crunchstat-summary/bin/crunchstat-summary
@@ -4,8 +4,11 @@ from __future__ import print_function
 
 import crunchstat_summary.command
 import crunchstat_summary.summarizer
+import logging
 import sys
 
+logging.getLogger().addHandler(logging.StreamHandler())
+
 args = crunchstat_summary.command.ArgumentParser().parse_args(sys.argv[1:])
 s = crunchstat_summary.command.Command(args).summarizer()
 s.run()
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 1b7f950..49b67ff 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -3,13 +3,21 @@ from __future__ import print_function
 import arvados
 import collections
 import functools
+import itertools
+import logging
+import math
 import re
 import sys
 
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 class Summarizer(object):
-    def __init__(self, logdata):
+    existing_constraints = {}
+
+    def __init__(self, logdata, label='job'):
         self._logdata = logdata
+        self.label = label
 
     def run(self):
         # stats_max: {category: {stat: val}}
@@ -64,9 +72,9 @@ class Summarizer(object):
                             this_interval_s = val
                             continue
                         elif not (this_interval_s > 0):
-                            print("BUG? interval stat given with duration {!r}".
-                                  format(this_interval_s),
-                                  file=sys.stderr)
+                            logger.error(
+                                "BUG? interval stat given with duration {!r}".
+                                format(this_interval_s))
                             continue
                         else:
                             stat = stat + '__rate'
@@ -75,12 +83,7 @@ class Summarizer(object):
                         self.task_stats[task_id][category][stat] = val
                     if val > self.stats_max[category][stat]:
                         self.stats_max[category][stat] = val
-
-    def report(self):
-        return "\n".join(self._report_gen()) + "\n"
-
-    def _report_gen(self):
-        job_tot = collections.defaultdict(
+        self.job_tot = collections.defaultdict(
             functools.partial(collections.defaultdict, int))
         for task_id, task_stat in self.task_stats.iteritems():
             for category, stat_last in task_stat.iteritems():
@@ -88,7 +91,14 @@ class Summarizer(object):
                     if stat in ['cpus', 'cache', 'swap', 'rss']:
                         # meaningless stats like 16 cpu cores x 5 tasks = 80
                         continue
-                    job_tot[category][stat] += val
+                    self.job_tot[category][stat] += val
+
+    def report(self):
+        return "\n".join(itertools.chain(
+            self._report_gen(),
+            self._recommend_gen())) + "\n"
+
+    def _report_gen(self):
         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
         for category, stat_max in sorted(self.stats_max.iteritems()):
             for stat, val in sorted(stat_max.iteritems()):
@@ -96,7 +106,7 @@ class Summarizer(object):
                     continue
                 max_rate = self._format(stat_max.get(stat+'__rate', '-'))
                 val = self._format(val)
-                tot = self._format(job_tot[category].get(stat, '-'))
+                tot = self._format(self.job_tot[category].get(stat, '-'))
                 yield "\t".join([category, stat, str(val), max_rate, tot])
         for args in (
                 ('Max CPU time spent by a single task: {}s',
@@ -106,7 +116,8 @@ class Summarizer(object):
                  self.stats_max['cpu']['user+sys__rate'],
                  lambda x: x * 100),
                 ('Overall CPU usage: {}%',
-                 job_tot['cpu']['user+sys'] / job_tot['time']['elapsed'],
+                 self.job_tot['cpu']['user+sys'] /
+                 self.job_tot['time']['elapsed'],
                  lambda x: x * 100),
                 ('Max memory used by a single task: {}GB',
                  self.stats_max['mem']['rss'],
@@ -124,6 +135,47 @@ class Summarizer(object):
                 val = transform(val)
             yield "# "+format_string.format(self._format(val))
 
+    def _recommend_gen(self):
+        return itertools.chain(
+            self._recommend_cpu(),
+            self._recommend_ram())
+
+    def _recommend_cpu(self):
+        """Recommend asking for 4 cores if max CPU usage was 333%"""
+
+        cpu_max_rate = self.stats_max['cpu']['user+sys__rate']
+        if cpu_max_rate == float('-Inf'):
+            logger.warning('%s: no CPU usage data', self.label)
+            return
+        used_cores = int(math.ceil(cpu_max_rate))
+        asked_cores =  self.existing_constraints.get('min_cores_per_node')
+        if asked_cores is None or used_cores < asked_cores:
+            yield (
+                '#!! {} max CPU usage was {}% -- '
+                'try runtime_constraints "min_cores_per_node":{}'
+            ).format(
+                self.label,
+                int(math.ceil(cpu_max_rate*100)),
+                int(used_cores))
+
+    def _recommend_ram(self):
+        """Recommend asking for 2048 MiB RAM if max rss was 1248 MiB"""
+
+        used_ram = self.stats_max['mem']['rss']
+        if used_ram == float('-Inf'):
+            logger.warning('%s: no memory usage data', self.label)
+            return
+        used_ram = math.ceil(float(used_ram) / (1<<20))
+        asked_ram = self.existing_constraints.get('min_ram_mb_per_node')
+        if asked_ram is None or math.ceil(used_ram/(1<<10)) < asked_ram/(1<<10):
+            yield (
+                '#!! {} never used more than {} MiB RAM -- '
+                'try runtime_constraints "min_ram_mb_per_node":{}'
+            ).format(
+                self.label,
+                int(used_ram),
+                int(math.ceil(used_ram/(1<<10))*(1<<10)))
+
     def _format(self, val):
         """Return a string representation of a stat.
 
@@ -133,6 +185,7 @@ class Summarizer(object):
         else:
             return '{}'.format(val)
 
+
 class CollectionSummarizer(Summarizer):
     def __init__(self, collection_id):
         collection = arvados.collection.CollectionReader(collection_id)
@@ -141,7 +194,9 @@ class CollectionSummarizer(Summarizer):
             raise ValueError(
                 "collection {} has {} files; need exactly one".format(
                     collection_id, len(filenames)))
-        super(CollectionSummarizer, self).__init__(collection.open(filenames[0]))
+        super(CollectionSummarizer, self).__init__(
+            collection.open(filenames[0]))
+
 
 class JobSummarizer(CollectionSummarizer):
     def __init__(self, job):
@@ -150,12 +205,15 @@ class JobSummarizer(CollectionSummarizer):
             self.job = arv.jobs().get(uuid=job).execute()
         else:
             self.job = job
+        self.label = self.job['uuid']
+        self.existing_constraints = self.job.get('runtime_constraints', {})
         if not self.job['log']:
             raise ValueError(
                 "job {} has no log; live summary not implemented".format(
                     self.job['uuid']))
         super(JobSummarizer, self).__init__(self.job['log'])
 
+
 class PipelineSummarizer():
     def __init__(self, pipeline_instance_uuid):
         arv = arvados.api('v1')
@@ -164,16 +222,17 @@ class PipelineSummarizer():
         self.summarizers = collections.OrderedDict()
         for cname, component in instance['components'].iteritems():
             if 'job' not in component:
-                print("{}: skipping component with no job assigned".format(
-                    cname), file=sys.stderr)
+                logger.warning(
+                    "%s: skipping component with no job assigned", cname)
             elif component['job'].get('log') is None:
-                print("{}: skipping component with no log available".format(
-                    cname), file=sys.stderr)
+                logger.warning(
+                    "%s: skipping job %s with no log available",
+                    cname, component['job'].get('uuid'))
             else:
-                print("{}: reading log from {}".format(
-                    cname, component['job']['log']), file=sys.stderr)
-                summarizer = CollectionSummarizer(component['job']['log'])
-                summarizer.job_uuid = component['job']['uuid']
+                logger.debug(
+                    "%s: reading log from %s", cname, component['job']['log'])
+                summarizer = JobSummarizer(component['job'])
+                summarizer.label = cname
                 self.summarizers[cname] = summarizer
 
     def run(self):
@@ -184,7 +243,7 @@ class PipelineSummarizer():
         txt = ''
         for cname, summarizer in self.summarizers.iteritems():
             txt += '### Summary for {} ({})\n'.format(
-                cname, summarizer.job_uuid)
+                cname, summarizer.job['uuid'])
             txt += summarizer.report()
             txt += '\n'
         return txt
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index ef7beb1..c94cd24 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -28,3 +28,5 @@ time	elapsed	80	-	80
 # Max memory used by a single task: 0.35GB
 # Max network traffic in a single task: 1.79GB
 # Max network speed in a single interval: 42.58MB/s
+#!! job max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
+#!! job never used more than 334 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index 38af3e7..e711824 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -15,3 +15,4 @@ time	elapsed	2	-	4
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
+#!! job never used more than 1 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index 7e42d61..5772cb4 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -15,3 +15,4 @@ time	elapsed	2	-	3
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
+#!! job never used more than 1 MiB RAM -- try runtime_constraints "min_ram_mb_per_node":1024
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index 1bce693..a19d7ad 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -24,7 +24,7 @@ class ReportDiff(unittest.TestCase):
             expect, got, fromfile=expectfile, tofile="(generated)")))
 
 
-class ExampleLogsTestCase(ReportDiff):
+class SummarizeFile(ReportDiff):
     def test_example_files(self):
         for fnm in glob.glob(os.path.join(TESTS_DIR, '*.txt.gz')):
             logfile = os.path.join(TESTS_DIR, fnm)
@@ -35,22 +35,28 @@ class ExampleLogsTestCase(ReportDiff):
             self.diff_known_report(logfile, summarizer)
 
 
-class LookupJobUUID(ReportDiff):
-    fake_uuid = 'zzzzz-8i9sb-jq0ekny1xou3zoh'
+class SummarizeJob(ReportDiff):
+    fake_job_uuid = 'zzzzz-8i9sb-jjjjjjjjjjjjjjj'
+    fake_log_id = 'fake-log-collection-id'
+    fake_job = {
+        'uuid': fake_job_uuid,
+        'log': fake_log_id,
+    }
+    logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
 
     @mock.patch('arvados.collection.CollectionReader')
     @mock.patch('arvados.api')
-    def test_job_uuid(self, mock_api, mock_cr):
-        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
-        mock_api().jobs().get().execute.return_value = {'log': 'fake-uuid'}
+    def test_job_report(self, mock_api, mock_cr):
+        mock_api().jobs().get().execute.return_value = self.fake_job
         mock_cr().__iter__.return_value = ['fake-logfile.txt']
-        mock_cr().open.return_value = gzip.open(logfile)
+        mock_cr().open.return_value = gzip.open(self.logfile)
         args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--job', self.fake_uuid])
+            ['--job', self.fake_job_uuid])
         summarizer = crunchstat_summary.command.Command(args).summarizer()
         summarizer.run()
-        self.diff_known_report(logfile, summarizer)
-        mock_api().jobs().get.assert_called_with(uuid=self.fake_uuid)
+        self.diff_known_report(self.logfile, summarizer)
+        mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
+        mock_cr.assert_called_with(self.fake_log_id)
         mock_cr().open.assert_called_with('fake-logfile.txt')
 
 
@@ -63,12 +69,20 @@ class SummarizePipeline(ReportDiff):
                 'job': {
                     'uuid': 'zzzzz-8i9sb-000000000000000',
                     'log': 'fake-log-pdh-0',
+                    'runtime_constraints': {
+                        'min_ram_mb_per_node': 1024,
+                        'min_cores_per_node': 1,
+                    },
                 },
             }],
             ['bar', {
                 'job': {
                     'uuid': 'zzzzz-8i9sb-000000000000001',
                     'log': 'fake-log-pdh-1',
+                    'runtime_constraints': {
+                        'min_ram_mb_per_node': 1024,
+                        'min_cores_per_node': 1,
+                    },
                 },
             }],
             ['no-job-assigned', {}],
@@ -81,6 +95,10 @@ class SummarizePipeline(ReportDiff):
                 'job': {
                     'uuid': 'zzzzz-8i9sb-000000000000002',
                     'log': 'fake-log-pdh-2',
+                    'runtime_constraints': {
+                        'min_ram_mb_per_node': 1024,
+                        'min_cores_per_node': 1,
+                    },
                 },
             }]]),
     }
@@ -98,13 +116,16 @@ class SummarizePipeline(ReportDiff):
         summarizer = crunchstat_summary.command.Command(args).summarizer()
         summarizer.run()
 
+        job_report = [
+            line for line in open(logfile+'.report').readlines()
+            if not line.startswith('#!! ')]
         expect = (
             ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
-            open(logfile+'.report').readlines() + ['\n'] +
+            job_report + ['\n'] +
             ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] +
-            open(logfile+'.report').readlines() + ['\n'] +
+            job_report + ['\n'] +
             ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
-            open(logfile+'.report').readlines())
+            job_report)
         self.diff_report(summarizer, expect)
         mock_cr.assert_has_calls(
             [

commit b608f1d1917b50317840b43f8c7caebddf36dc22
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Dec 21 14:42:44 2015 -0500

    8123: Add --pipeline-instance mode: generate a report for each finished component.

diff --git a/tools/crunchstat-summary/bin/crunchstat-summary b/tools/crunchstat-summary/bin/crunchstat-summary
index 662d783..f42d30b 100755
--- a/tools/crunchstat-summary/bin/crunchstat-summary
+++ b/tools/crunchstat-summary/bin/crunchstat-summary
@@ -7,6 +7,6 @@ import crunchstat_summary.summarizer
 import sys
 
 args = crunchstat_summary.command.ArgumentParser().parse_args(sys.argv[1:])
-s = crunchstat_summary.summarizer.Summarizer(args)
+s = crunchstat_summary.command.Command(args).summarizer()
 s.run()
 print(s.report(), end='')
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index 8186e5d..fc37190 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -1,4 +1,8 @@
 import argparse
+import gzip
+import sys
+
+from crunchstat_summary import summarizer
 
 
 class ArgumentParser(argparse.ArgumentParser):
@@ -10,5 +14,27 @@ class ArgumentParser(argparse.ArgumentParser):
             '--job', type=str, metavar='UUID',
             help='Look up the specified job and read its log data from Keep')
         src.add_argument(
+            '--pipeline-instance', type=str, metavar='UUID',
+            help='Summarize each component of the given pipeline instance')
+        src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
+
+
+class Command(object):
+    def __init__(self, args):
+        self.args = args
+
+    def summarizer(self):
+        if self.args.pipeline_instance:
+            return summarizer.PipelineSummarizer(self.args.pipeline_instance)
+        elif self.args.job:
+            return summarizer.JobSummarizer(self.args.job)
+        elif self.args.log_file:
+            if self.args.log_file.endswith('.gz'):
+                fh = gzip.open(self.args.log_file)
+            else:
+                fh = open(self.args.log_file)
+            return summarizer.Summarizer(fh)
+        else:
+            return summarizer.Summarizer(sys.stdin)
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index ac0964b..1b7f950 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -3,14 +3,13 @@ from __future__ import print_function
 import arvados
 import collections
 import functools
-import gzip
 import re
 import sys
 
 
 class Summarizer(object):
-    def __init__(self, args):
-        self.args = args
+    def __init__(self, logdata):
+        self._logdata = logdata
 
     def run(self):
         # stats_max: {category: {stat: val}}
@@ -20,7 +19,7 @@ class Summarizer(object):
         # task_stats: {task_id: {category: {stat: val}}}
         self.task_stats = collections.defaultdict(
             functools.partial(collections.defaultdict, dict))
-        for line in self._logdata():
+        for line in self._logdata:
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
             if m:
                 task_id = m.group('seq')
@@ -35,6 +34,8 @@ class Summarizer(object):
             if m.group('category').endswith(':'):
                 # "notice:" etc.
                 continue
+            elif m.group('category') == 'error':
+                continue
             task_id = m.group('seq')
             this_interval_s = None
             for group in ['current', 'interval']:
@@ -44,10 +45,15 @@ class Summarizer(object):
                 words = m.group(group).split(' ')
                 stats = {}
                 for val, stat in zip(words[::2], words[1::2]):
-                    if '.' in val:
-                        stats[stat] = float(val)
-                    else:
-                        stats[stat] = int(val)
+                    try:
+                        if '.' in val:
+                            stats[stat] = float(val)
+                        else:
+                            stats[stat] = int(val)
+                    except ValueError as e:
+                        raise ValueError(
+                            'Error parsing {} stat in "{}": {!r}'.format(
+                                stat, line, e))
                 if 'user' in stats or 'sys' in stats:
                     stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
                 if 'tx' in stats or 'rx' in stats:
@@ -127,25 +133,58 @@ class Summarizer(object):
         else:
             return '{}'.format(val)
 
-    def _logdata(self):
-        if self.args.log_file:
-            if self.args.log_file.endswith('.gz'):
-                return gzip.open(self.args.log_file)
-            else:
-                return open(self.args.log_file)
-        elif self.args.job:
-            arv = arvados.api('v1')
-            job = arv.jobs().get(uuid=self.args.job).execute()
-            if not job['log']:
-                raise ValueError(
-                    "job {} has no log; live summary not implemented".format(
-                        self.args.job))
-            collection = arvados.collection.CollectionReader(job['log'])
-            filenames = [filename for filename in collection]
-            if len(filenames) != 1:
-                raise ValueError(
-                    "collection {} has {} files; need exactly one".format(
-                        job.log, len(filenames)))
-            return collection.open(filenames[0])
+class CollectionSummarizer(Summarizer):
+    def __init__(self, collection_id):
+        collection = arvados.collection.CollectionReader(collection_id)
+        filenames = [filename for filename in collection]
+        if len(filenames) != 1:
+            raise ValueError(
+                "collection {} has {} files; need exactly one".format(
+                    collection_id, len(filenames)))
+        super(CollectionSummarizer, self).__init__(collection.open(filenames[0]))
+
+class JobSummarizer(CollectionSummarizer):
+    def __init__(self, job):
+        arv = arvados.api('v1')
+        if isinstance(job, str):
+            self.job = arv.jobs().get(uuid=job).execute()
         else:
-            return sys.stdin
+            self.job = job
+        if not self.job['log']:
+            raise ValueError(
+                "job {} has no log; live summary not implemented".format(
+                    self.job['uuid']))
+        super(JobSummarizer, self).__init__(self.job['log'])
+
+class PipelineSummarizer():
+    def __init__(self, pipeline_instance_uuid):
+        arv = arvados.api('v1')
+        instance = arv.pipeline_instances().get(
+            uuid=pipeline_instance_uuid).execute()
+        self.summarizers = collections.OrderedDict()
+        for cname, component in instance['components'].iteritems():
+            if 'job' not in component:
+                print("{}: skipping component with no job assigned".format(
+                    cname), file=sys.stderr)
+            elif component['job'].get('log') is None:
+                print("{}: skipping component with no log available".format(
+                    cname), file=sys.stderr)
+            else:
+                print("{}: reading log from {}".format(
+                    cname, component['job']['log']), file=sys.stderr)
+                summarizer = CollectionSummarizer(component['job']['log'])
+                summarizer.job_uuid = component['job']['uuid']
+                self.summarizers[cname] = summarizer
+
+    def run(self):
+        for summarizer in self.summarizers.itervalues():
+            summarizer.run()
+
+    def report(self):
+        txt = ''
+        for cname, summarizer in self.summarizers.iteritems():
+            txt += '### Summary for {} ({})\n'.format(
+                cname, summarizer.job_uuid)
+            txt += summarizer.report()
+            txt += '\n'
+        return txt
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index dbc3843..1bce693 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -1,22 +1,115 @@
+import arvados
+import collections
 import crunchstat_summary.command
 import crunchstat_summary.summarizer
 import difflib
 import glob
+import gzip
+import mock
 import os
 import unittest
 
 
-class ExampleLogsTestCase(unittest.TestCase):
+TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+
+class ReportDiff(unittest.TestCase):
+    def diff_known_report(self, logfile, summarizer):
+        expectfile = logfile+'.report'
+        expect = open(expectfile).readlines()
+        self.diff_report(summarizer, expect, expectfile=expectfile)
+
+    def diff_report(self, summarizer, expect, expectfile=None):
+        got = [x+"\n" for x in summarizer.report().strip("\n").split("\n")]
+        self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
+            expect, got, fromfile=expectfile, tofile="(generated)")))
+
+
+class ExampleLogsTestCase(ReportDiff):
     def test_example_files(self):
-        dirname = os.path.dirname(os.path.abspath(__file__))
-        for fnm in glob.glob(os.path.join(dirname, '*.txt.gz')):
-            logfile = os.path.join(dirname, fnm)
+        for fnm in glob.glob(os.path.join(TESTS_DIR, '*.txt.gz')):
+            logfile = os.path.join(TESTS_DIR, fnm)
             args = crunchstat_summary.command.ArgumentParser().parse_args(
                 ['--log-file', logfile])
-            summarizer = crunchstat_summary.summarizer.Summarizer(args)
+            summarizer = crunchstat_summary.command.Command(args).summarizer()
             summarizer.run()
-            got = [x+"\n" for x in summarizer.report().strip("\n").split("\n")]
-            expectfile = logfile+'.report'
-            expect = open(expectfile).readlines()
-            self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
-                expect, got, fromfile=expectfile, tofile="(generated)")))
+            self.diff_known_report(logfile, summarizer)
+
+
+class LookupJobUUID(ReportDiff):
+    fake_uuid = 'zzzzz-8i9sb-jq0ekny1xou3zoh'
+
+    @mock.patch('arvados.collection.CollectionReader')
+    @mock.patch('arvados.api')
+    def test_job_uuid(self, mock_api, mock_cr):
+        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
+        mock_api().jobs().get().execute.return_value = {'log': 'fake-uuid'}
+        mock_cr().__iter__.return_value = ['fake-logfile.txt']
+        mock_cr().open.return_value = gzip.open(logfile)
+        args = crunchstat_summary.command.ArgumentParser().parse_args(
+            ['--job', self.fake_uuid])
+        summarizer = crunchstat_summary.command.Command(args).summarizer()
+        summarizer.run()
+        self.diff_known_report(logfile, summarizer)
+        mock_api().jobs().get.assert_called_with(uuid=self.fake_uuid)
+        mock_cr().open.assert_called_with('fake-logfile.txt')
+
+
+class SummarizePipeline(ReportDiff):
+    fake_instance = {
+        'uuid': 'zzzzz-d1hrv-i3e77t9z5y8j9cc',
+        'owner_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
+        'components': collections.OrderedDict([
+            ['foo', {
+                'job': {
+                    'uuid': 'zzzzz-8i9sb-000000000000000',
+                    'log': 'fake-log-pdh-0',
+                },
+            }],
+            ['bar', {
+                'job': {
+                    'uuid': 'zzzzz-8i9sb-000000000000001',
+                    'log': 'fake-log-pdh-1',
+                },
+            }],
+            ['no-job-assigned', {}],
+            ['unfinished-job', {
+                'job': {
+                    'uuid': 'zzzzz-8i9sb-xxxxxxxxxxxxxxx',
+                },
+            }],
+            ['baz', {
+                'job': {
+                    'uuid': 'zzzzz-8i9sb-000000000000002',
+                    'log': 'fake-log-pdh-2',
+                },
+            }]]),
+    }
+
+    @mock.patch('arvados.collection.CollectionReader')
+    @mock.patch('arvados.api')
+    def test_pipeline(self, mock_api, mock_cr):
+        logfile = os.path.join(TESTS_DIR, 'logfile_20151204190335.txt.gz')
+        mock_api().pipeline_instances().get().execute. \
+            return_value = self.fake_instance
+        mock_cr().__iter__.return_value = ['fake-logfile.txt']
+        mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
+        args = crunchstat_summary.command.ArgumentParser().parse_args(
+            ['--pipeline-instance', self.fake_instance['uuid']])
+        summarizer = crunchstat_summary.command.Command(args).summarizer()
+        summarizer.run()
+
+        expect = (
+            ['### Summary for foo (zzzzz-8i9sb-000000000000000)\n'] +
+            open(logfile+'.report').readlines() + ['\n'] +
+            ['### Summary for bar (zzzzz-8i9sb-000000000000001)\n'] +
+            open(logfile+'.report').readlines() + ['\n'] +
+            ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
+            open(logfile+'.report').readlines())
+        self.diff_report(summarizer, expect)
+        mock_cr.assert_has_calls(
+            [
+                mock.call('fake-log-pdh-0'),
+                mock.call('fake-log-pdh-1'),
+                mock.call('fake-log-pdh-2'),
+            ], any_order=True)
+        mock_cr().open.assert_called_with('fake-logfile.txt')

-----------------------------------------------------------------------


hooks/post-receive
--