[ARVADOS] updated: 423aa837838de86356440127a107653cf1c69ca3

git at public.curoverse.com git at public.curoverse.com
Mon Jan 4 09:42:25 EST 2016


Summary of changes:
 tools/crunchstat-summary/crunchstat_summary/chartjs.py | 4 ++--
 tools/crunchstat-summary/crunchstat_summary/command.py | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

  discards  73deed377f62fe459c55c71c0602936ca55a3c28 (commit)
  discards  7fa204d6bcf35d36ec8be12d0e80de6a6e701f86 (commit)
  discards  5271b2180f18715d7fd09c3d4d4289fbcd2287aa (commit)
       via  423aa837838de86356440127a107653cf1c69ca3 (commit)
       via  45e33a52bfdc8677e3eaf5561decf71100a474d7 (commit)
       via  d85fc3bcf31f146d93250587ea24d2afd53044ac (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (73deed377f62fe459c55c71c0602936ca55a3c28)
            \
             N -- N -- N (423aa837838de86356440127a107653cf1c69ca3)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 423aa837838de86356440127a107653cf1c69ca3
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Dec 24 13:51:35 2015 -0500

    7883: Add option to include stats from child jobs.

diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index 2c7574c..a5339dd 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -21,6 +21,9 @@ class ArgumentParser(argparse.ArgumentParser):
             '--log-file', type=str,
             help='Read log data from a regular file')
         self.add_argument(
+            '--include-child-jobs', action='store_true',
+            help='Include stats from child jobs')
+        self.add_argument(
             '--format', type=str, choices=('html', 'text'), default='text',
             help='Report format')
         self.add_argument(
@@ -40,18 +43,21 @@ class Command(object):
             logger.setLevel(logging.INFO)
 
     def run(self):
+        kwargs = {
+            'include_child_jobs': self.args.include_child_jobs,
+        }
         if self.args.pipeline_instance:
-            self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
+            self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance, **kwargs)
         elif self.args.job:
-            self.summer = summarizer.JobSummarizer(self.args.job)
+            self.summer = summarizer.JobSummarizer(self.args.job, **kwargs)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = gzip.open(self.args.log_file)
             else:
                 fh = open(self.args.log_file)
-            self.summer = summarizer.Summarizer(fh)
+            self.summer = summarizer.Summarizer(fh, **kwargs)
         else:
-            self.summer = summarizer.Summarizer(sys.stdin)
+            self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
         return self.summer.run()
 
     def report(self):
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 6396b5d..d6c1916 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -28,15 +28,14 @@ class Task(object):
 class Summarizer(object):
     existing_constraints = {}
 
-    def __init__(self, logdata, label=None):
+    def __init__(self, logdata, label=None, include_child_jobs=True):
         self._logdata = logdata
+
         self.label = label
         self.starttime = None
         self.finishtime = None
-        logger.debug("%s: logdata %s", self.label, repr(logdata))
+        self._include_child_jobs = include_child_jobs
 
-    def run(self):
-        logger.debug("%s: parsing log data", self.label)
         # stats_max: {category: {stat: val}}
         self.stats_max = collections.defaultdict(
             functools.partial(collections.defaultdict,
@@ -44,19 +43,52 @@ class Summarizer(object):
         # task_stats: {task_id: {category: {stat: val}}}
         self.task_stats = collections.defaultdict(
             functools.partial(collections.defaultdict, dict))
+
+        self.seq_to_uuid = {}
         self.tasks = collections.defaultdict(Task)
+
+        logger.debug("%s: logdata %s", self.label, repr(logdata))
+
+    def run(self):
+        logger.debug("%s: parsing log data", self.label)
         for line in self._logdata:
+            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
+            if m:
+                seq = int(m.group('seq'))
+                uuid = m.group('task_uuid')
+                self.seq_to_uuid[seq] = uuid
+                logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
+                continue
+
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
             if m:
-                task_id = m.group('seq')
+                task_id = self.seq_to_uuid[int(m.group('seq'))]
                 elapsed = int(m.group('elapsed'))
                 self.task_stats[task_id]['time'] = {'elapsed': elapsed}
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
                 continue
+
+            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
+            if m:
+                uuid = m.group('uuid')
+                if not self._include_child_jobs:
+                    logger.warning('%s: omitting %s (try --include-child-job)',
+                                   self.label, uuid)
+                    continue
+                logger.debug('%s: follow %s', self.label, uuid)
+                child_summarizer = JobSummarizer(uuid)
+                child_summarizer.stats_max = self.stats_max
+                child_summarizer.task_stats = self.task_stats
+                child_summarizer.tasks = self.tasks
+                child_summarizer.run()
+                logger.debug('%s: done %s', self.label, uuid)
+                continue
+
             m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
+
             if self.label is None:
                 self.label = m.group('job_uuid')
                 logger.debug('%s: using job uuid as label', self.label)
@@ -65,7 +97,7 @@ class Summarizer(object):
                 continue
             elif m.group('category') == 'error':
                 continue
-            task_id = m.group('seq')
+            task_id = self.seq_to_uuid[int(m.group('seq'))]
             task = self.tasks[task_id]
 
             # Use the first and last crunchstat timestamps as
@@ -126,6 +158,8 @@ class Summarizer(object):
                         self.task_stats[task_id][category][stat] = val
                     if val > self.stats_max[category][stat]:
                         self.stats_max[category][stat] = val
+        logger.debug('%s: done parsing', self.label)
+
         self.job_tot = collections.defaultdict(
             functools.partial(collections.defaultdict, int))
         for task_id, task_stat in self.task_stats.iteritems():
@@ -135,6 +169,7 @@ class Summarizer(object):
                         # meaningless stats like 16 cpu cores x 5 tasks = 80
                         continue
                     self.job_tot[category][stat] += val
+        logger.debug('%s: done totals', self.label)
 
     def long_label(self):
         label = self.label
@@ -169,6 +204,9 @@ class Summarizer(object):
                 tot = self._format(self.job_tot[category].get(stat, '-'))
                 yield "\t".join([category, stat, str(val), max_rate, tot])
         for args in (
+                ('Number of tasks: {}',
+                 len(self.tasks),
+                 None),
                 ('Max CPU time spent by a single task: {}s',
                  self.stats_max['cpu']['user+sys'],
                  None),
@@ -249,7 +287,8 @@ class Summarizer(object):
 
 
 class CollectionSummarizer(Summarizer):
-    def __init__(self, collection_id):
+    def __init__(self, collection_id, **kwargs):
+        logger.debug('load collection %s', collection_id)
         collection = arvados.collection.CollectionReader(collection_id)
         filenames = [filename for filename in collection]
         if len(filenames) != 1:
@@ -257,12 +296,12 @@ class CollectionSummarizer(Summarizer):
                 "collection {} has {} files; need exactly one".format(
                     collection_id, len(filenames)))
         super(CollectionSummarizer, self).__init__(
-            collection.open(filenames[0]))
+            collection.open(filenames[0]), **kwargs)
         self.label = collection_id
 
 
 class JobSummarizer(CollectionSummarizer):
-    def __init__(self, job):
+    def __init__(self, job, **kwargs):
         arv = arvados.api('v1')
         if isinstance(job, str):
             self.job = arv.jobs().get(uuid=job).execute()
@@ -274,12 +313,12 @@ class JobSummarizer(CollectionSummarizer):
             raise ValueError(
                 "job {} has no log; live summary not implemented".format(
                     self.job['uuid']))
-        super(JobSummarizer, self).__init__(self.job['log'])
+        super(JobSummarizer, self).__init__(self.job['log'], **kwargs)
         self.label = self.job['uuid']
 
 
 class PipelineSummarizer():
-    def __init__(self, pipeline_instance_uuid):
+    def __init__(self, pipeline_instance_uuid, **kwargs):
         arv = arvados.api('v1', model=OrderedJsonModel())
         instance = arv.pipeline_instances().get(
             uuid=pipeline_instance_uuid).execute()
@@ -295,7 +334,7 @@ class PipelineSummarizer():
             else:
                 logger.info(
                     "%s: logdata %s", cname, component['job']['log'])
-                summarizer = JobSummarizer(component['job'])
+                summarizer = JobSummarizer(component['job'], **kwargs)
                 summarizer.label = cname
                 self.summarizers[cname] = summarizer
         self.label = pipeline_instance_uuid
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index b12e931..0ba0181 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -22,11 +22,12 @@ net:keep0	rx	0	0.00	0
 net:keep0	tx	0	0.00	0
 net:keep0	tx+rx	0	0.00	0
 time	elapsed	80	-	80
+# Number of tasks: 1
 # Max CPU time spent by a single task: 5.75s
 # Max CPU usage in a single interval: 13.00%
 # Overall CPU usage: 7.19%
 # Max memory used by a single task: 0.35GB
 # Max network traffic in a single task: 1.79GB
 # Max network speed in a single interval: 42.58MB/s
-#!! job max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
-#!! job max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index 8e1a2d8..0641bba 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -11,8 +11,9 @@ net:eth0	rx	90	-	90
 net:eth0	tx	90	-	90
 net:eth0	tx+rx	180	-	180
 time	elapsed	2	-	4
+# Number of tasks: 2
 # Max CPU time spent by a single task: 0.00s
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index dbe9321..19fe0ed 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -11,8 +11,9 @@ net:eth0	rx	90	-	90
 net:eth0	tx	90	-	90
 net:eth0	tx+rx	180	-	180
 time	elapsed	2	-	3
+# Number of tasks: 2
 # Max CPU time spent by a single task: 0.00s
 # Overall CPU usage: 0.00%
 # Max memory used by a single task: 0.00GB
 # Max network traffic in a single task: 0.00GB
-#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index 09e21a1..d35e81e 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -35,7 +35,7 @@ class SummarizeFile(ReportDiff):
 
 
 class SummarizeJob(ReportDiff):
-    fake_job_uuid = 'zzzzz-8i9sb-jjjjjjjjjjjjjjj'
+    fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
     fake_log_id = 'fake-log-collection-id'
     fake_job = {
         'uuid': fake_job_uuid,

commit 45e33a52bfdc8677e3eaf5561decf71100a474d7
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Dec 23 14:09:37 2015 -0500

    7883: Generate multiple sets of charts when data source is a pipeline instance.

diff --git a/tools/crunchstat-summary/crunchstat_summary/__init__.py b/tools/crunchstat-summary/crunchstat_summary/__init__.py
index e69de29..c10988d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/__init__.py
+++ b/tools/crunchstat-summary/crunchstat_summary/__init__.py
@@ -0,0 +1,4 @@
+import logging
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
index 3106e62..72ff7a4 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.js
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -1,11 +1,27 @@
 window.onload = function() {
-    var options = {};
-    chartData.forEach(function(data, idx) {
-        var div = document.createElement('div');
-        div.setAttribute('id', 'chart-'+idx);
-        div.setAttribute('style', 'width: 100%; height: 150px');
-        document.body.appendChild(div);
-        var chart = new CanvasJS.Chart('chart-'+idx, data);
-        chart.render();
+    var charts = {};
+    sections.forEach(function(section, section_idx) {
+        var h1 = document.createElement('h1');
+        h1.appendChild(document.createTextNode(section.label));
+        document.body.appendChild(h1);
+        section.charts.forEach(function(data, chart_idx) {
+            // Skip chart if every series has zero data points
+            if (0 == data.data.reduce(function(len, series) {
+                return len + series.dataPoints.length;
+            }, 0)) {
+                return;
+            }
+            var id = 'chart-'+section_idx+'-'+chart_idx;
+            var div = document.createElement('div');
+            div.setAttribute('id', id);
+            div.setAttribute('style', 'width: 100%; height: 150px');
+            document.body.appendChild(div);
+            charts[id] = new CanvasJS.Chart(id, data);
+            charts[id].render();
+        });
     });
-}
+
+    if (typeof window.debug === 'undefined')
+        window.debug = {};
+    window.debug.charts = charts;
+};
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
index 85b49b8..590df27 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -3,13 +3,15 @@ from __future__ import print_function
 import json
 import pkg_resources
 
+from crunchstat_summary import logger
+
 
 class ChartJS(object):
-    JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+    JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.min.js'
 
-    def __init__(self, label, tasks):
+    def __init__(self, label, summarizers):
         self.label = label
-        self.tasks = tasks
+        self.summarizers = summarizers
 
     def html(self):
         return '''<!doctype html><html><head>
@@ -20,31 +22,48 @@ class ChartJS(object):
         '''.format(self.label, self.JSLIB, self.js())
 
     def js(self):
-        return 'var chartData = {};\n{}'.format(
-            json.dumps(self.chartData()),
+        return 'var sections = {};\n{}'.format(
+            json.dumps(self.sections()),
             pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
 
-    def chartData(self):
-        maxpts = 0
-        for task in self.tasks.itervalues():
-            for series in task.series.itervalues():
-                maxpts = max(maxpts, len(series))
+    def sections(self):
         return [
             {
-                'title': {
-                    'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+                'label': s.long_label(),
+                'charts': self.charts(s.label, s.tasks),
+            }
+            for s in self.summarizers]
+
+    def charts(self, label, tasks):
+        return [
+            {
+                'axisY': {
+                    'minimum': 0,
                 },
                 'data': [
                     {
                         'type': 'line',
-                        'dataPoints': [
-                            {'x': pt[0].total_seconds(), 'y': pt[1]}
-                            for pt in task.series[stat]]
+                        'markerType': 'none',
+                        'dataPoints': self._datapoints(
+                            label=uuid, task=task, series=task.series[stat]),
                     }
-                    for label, task in self.tasks.iteritems()
+                    for uuid, task in tasks.iteritems()
                 ],
+                'title': {
+                    'text': '{}: {} {}'.format(label, stat[0], stat[1]),
+                },
+                'zoomEnabled': True,
             }
             for stat in (('cpu', 'user+sys__rate'),
+                         ('mem', 'rss'),
                          ('net:eth0', 'tx+rx__rate'),
-                         ('mem', 'rss'))
-        ]
+                         ('net:keep0', 'tx+rx__rate'))]
+
+    def _datapoints(self, label, task, series):
+        points = [
+            {'x': pt[0].total_seconds(), 'y': pt[1]}
+            for pt in series]
+        if len(points) > 0:
+            points[-1]['markerType'] = 'cross'
+            points[-1]['markerSize'] = 12
+        return points
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index c28b00f..2c7574c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -1,8 +1,9 @@
 import argparse
 import gzip
+import logging
 import sys
 
-from crunchstat_summary import summarizer
+from crunchstat_summary import logger, summarizer
 
 
 class ArgumentParser(argparse.ArgumentParser):
@@ -22,11 +23,21 @@ class ArgumentParser(argparse.ArgumentParser):
         self.add_argument(
             '--format', type=str, choices=('html', 'text'), default='text',
             help='Report format')
+        self.add_argument(
+            '--verbose', action='store_true',
+            help='Write progress messages to stderr')
+        self.add_argument(
+            '--debug', action='store_true',
+            help='Write debug messages to stderr')
 
 
 class Command(object):
     def __init__(self, args):
         self.args = args
+        if args.debug:
+            logger.setLevel(logging.DEBUG)
+        elif args.verbose:
+            logger.setLevel(logging.INFO)
 
     def run(self):
         if self.args.pipeline_instance:
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index b630a0c..6396b5d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -6,13 +6,12 @@ import crunchstat_summary.chartjs
 import datetime
 import functools
 import itertools
-import logging
 import math
 import re
 import sys
 
-logger = logging.getLogger(__name__)
-logger.addHandler(logging.NullHandler())
+from arvados.api import OrderedJsonModel
+from crunchstat_summary import logger
 
 # Recommend memory constraints that are this multiple of an integral
 # number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
@@ -29,11 +28,15 @@ class Task(object):
 class Summarizer(object):
     existing_constraints = {}
 
-    def __init__(self, logdata, label='job'):
+    def __init__(self, logdata, label=None):
         self._logdata = logdata
         self.label = label
+        self.starttime = None
+        self.finishtime = None
+        logger.debug("%s: logdata %s", self.label, repr(logdata))
 
     def run(self):
+        logger.debug("%s: parsing log data", self.label)
         # stats_max: {category: {stat: val}}
         self.stats_max = collections.defaultdict(
             functools.partial(collections.defaultdict,
@@ -51,20 +54,34 @@ class Summarizer(object):
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
                 continue
-            m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+            m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
+            if self.label is None:
+                self.label = m.group('job_uuid')
+                logger.debug('%s: using job uuid as label', self.label)
             if m.group('category').endswith(':'):
                 # "notice:" etc.
                 continue
             elif m.group('category') == 'error':
                 continue
             task_id = m.group('seq')
+            task = self.tasks[task_id]
+
+            # Use the first and last crunchstat timestamps as
+            # approximations of starttime and finishtime.
             timestamp = datetime.datetime.strptime(
                 m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
-            task = self.tasks[task_id]
             if not task.starttime:
                 task.starttime = timestamp
+                logger.debug('%s: task %s starttime %s',
+                             self.label, task_id, timestamp)
+            task.finishtime = timestamp
+
+            if not self.starttime:
+                self.starttime = timestamp
+            self.finishtime = timestamp
+
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -119,13 +136,27 @@ class Summarizer(object):
                         continue
                     self.job_tot[category][stat] += val
 
+    def long_label(self):
+        label = self.label
+        if self.finishtime:
+            label += ' -- elapsed time '
+            s = (self.finishtime - self.starttime).total_seconds()
+            if s > 86400:
+                label += '{}d'.format(int(s/86400))
+            if s > 3600:
+                label += '{}h'.format(int(s/3600) % 24)
+            if s > 60:
+                label += '{}m'.format(int(s/60) % 60)
+            label += '{}s'.format(int(s) % 60)
+        return label
+
     def text_report(self):
         return "\n".join(itertools.chain(
             self._text_report_gen(),
             self._recommend_gen())) + "\n"
 
     def html_report(self):
-        return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+        return crunchstat_summary.chartjs.ChartJS(self.label, [self]).html()
 
     def _text_report_gen(self):
         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
@@ -227,6 +258,7 @@ class CollectionSummarizer(Summarizer):
                     collection_id, len(filenames)))
         super(CollectionSummarizer, self).__init__(
             collection.open(filenames[0]))
+        self.label = collection_id
 
 
 class JobSummarizer(CollectionSummarizer):
@@ -243,11 +275,12 @@ class JobSummarizer(CollectionSummarizer):
                 "job {} has no log; live summary not implemented".format(
                     self.job['uuid']))
         super(JobSummarizer, self).__init__(self.job['log'])
+        self.label = self.job['uuid']
 
 
 class PipelineSummarizer():
     def __init__(self, pipeline_instance_uuid):
-        arv = arvados.api('v1')
+        arv = arvados.api('v1', model=OrderedJsonModel())
         instance = arv.pipeline_instances().get(
             uuid=pipeline_instance_uuid).execute()
         self.summarizers = collections.OrderedDict()
@@ -260,11 +293,12 @@ class PipelineSummarizer():
                     "%s: skipping job %s with no log available",
                     cname, component['job'].get('uuid'))
             else:
-                logger.debug(
-                    "%s: reading log from %s", cname, component['job']['log'])
+                logger.info(
+                    "%s: logdata %s", cname, component['job']['log'])
                 summarizer = JobSummarizer(component['job'])
                 summarizer.label = cname
                 self.summarizers[cname] = summarizer
+        self.label = pipeline_instance_uuid
 
     def run(self):
         for summarizer in self.summarizers.itervalues():
@@ -278,3 +312,7 @@ class PipelineSummarizer():
             txt += summarizer.text_report()
             txt += '\n'
         return txt
+
+    def html_report(self):
+        return crunchstat_summary.chartjs.ChartJS(
+            self.label, self.summarizers.itervalues()).html()

commit d85fc3bcf31f146d93250587ea24d2afd53044ac
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Dec 23 11:39:14 2015 -0500

    7883: Add option (--format html) to generate canvasjs charts.

diff --git a/tools/crunchstat-summary/bin/crunchstat-summary b/tools/crunchstat-summary/bin/crunchstat-summary
index c32b50e..e16bd8e 100755
--- a/tools/crunchstat-summary/bin/crunchstat-summary
+++ b/tools/crunchstat-summary/bin/crunchstat-summary
@@ -10,6 +10,6 @@ import sys
 logging.getLogger().addHandler(logging.StreamHandler())
 
 args = crunchstat_summary.command.ArgumentParser().parse_args(sys.argv[1:])
-s = crunchstat_summary.command.Command(args).summarizer()
-s.run()
-print(s.report(), end='')
+cmd = crunchstat_summary.command.Command(args)
+cmd.run()
+print(cmd.report(), end='')
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
new file mode 100644
index 0000000..3106e62
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -0,0 +1,11 @@
+window.onload = function() {
+    var options = {};
+    chartData.forEach(function(data, idx) {
+        var div = document.createElement('div');
+        div.setAttribute('id', 'chart-'+idx);
+        div.setAttribute('style', 'width: 100%; height: 150px');
+        document.body.appendChild(div);
+        var chart = new CanvasJS.Chart('chart-'+idx, data);
+        chart.render();
+    });
+}
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
new file mode 100644
index 0000000..85b49b8
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+
+import json
+import pkg_resources
+
+
+class ChartJS(object):
+    JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+
+    def __init__(self, label, tasks):
+        self.label = label
+        self.tasks = tasks
+
+    def html(self):
+        return '''<!doctype html><html><head>
+        <title>{} stats</title>
+        <script type="text/javascript" src="{}"></script>
+        <script type="text/javascript">{}</script>
+        </head><body></body></html>
+        '''.format(self.label, self.JSLIB, self.js())
+
+    def js(self):
+        return 'var chartData = {};\n{}'.format(
+            json.dumps(self.chartData()),
+            pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
+
+    def chartData(self):
+        maxpts = 0
+        for task in self.tasks.itervalues():
+            for series in task.series.itervalues():
+                maxpts = max(maxpts, len(series))
+        return [
+            {
+                'title': {
+                    'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+                },
+                'data': [
+                    {
+                        'type': 'line',
+                        'dataPoints': [
+                            {'x': pt[0].total_seconds(), 'y': pt[1]}
+                            for pt in task.series[stat]]
+                    }
+                    for label, task in self.tasks.iteritems()
+                ],
+            }
+            for stat in (('cpu', 'user+sys__rate'),
+                         ('net:eth0', 'tx+rx__rate'),
+                         ('mem', 'rss'))
+        ]
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index fc37190..c28b00f 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -19,22 +19,32 @@ class ArgumentParser(argparse.ArgumentParser):
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
+        self.add_argument(
+            '--format', type=str, choices=('html', 'text'), default='text',
+            help='Report format')
 
 
 class Command(object):
     def __init__(self, args):
         self.args = args
 
-    def summarizer(self):
+    def run(self):
         if self.args.pipeline_instance:
-            return summarizer.PipelineSummarizer(self.args.pipeline_instance)
+            self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
         elif self.args.job:
-            return summarizer.JobSummarizer(self.args.job)
+            self.summer = summarizer.JobSummarizer(self.args.job)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = gzip.open(self.args.log_file)
             else:
                 fh = open(self.args.log_file)
-            return summarizer.Summarizer(fh)
+            self.summer = summarizer.Summarizer(fh)
         else:
-            return summarizer.Summarizer(sys.stdin)
+            self.summer = summarizer.Summarizer(sys.stdin)
+        return self.summer.run()
+
+    def report(self):
+        if self.args.format == 'html':
+            return self.summer.html_report()
+        elif self.args.format == 'text':
+            return self.summer.text_report()
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index f5d27d4..b630a0c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -2,6 +2,8 @@ from __future__ import print_function
 
 import arvados
 import collections
+import crunchstat_summary.chartjs
+import datetime
 import functools
 import itertools
 import logging
@@ -17,6 +19,13 @@ logger.addHandler(logging.NullHandler())
 # that have amounts like 7.5 GiB according to the kernel.)
 AVAILABLE_RAM_RATIO = 0.95
 
+
+class Task(object):
+    def __init__(self):
+        self.starttime = None
+        self.series = collections.defaultdict(list)
+
+
 class Summarizer(object):
     existing_constraints = {}
 
@@ -32,6 +41,7 @@ class Summarizer(object):
         # task_stats: {task_id: {category: {stat: val}}}
         self.task_stats = collections.defaultdict(
             functools.partial(collections.defaultdict, dict))
+        self.tasks = collections.defaultdict(Task)
         for line in self._logdata:
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
             if m:
@@ -41,7 +51,7 @@ class Summarizer(object):
                 if elapsed > self.stats_max['time']['elapsed']:
                     self.stats_max['time']['elapsed'] = elapsed
                 continue
-            m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+            m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
             if not m:
                 continue
             if m.group('category').endswith(':'):
@@ -50,6 +60,11 @@ class Summarizer(object):
             elif m.group('category') == 'error':
                 continue
             task_id = m.group('seq')
+            timestamp = datetime.datetime.strptime(
+                m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
+            task = self.tasks[task_id]
+            if not task.starttime:
+                task.starttime = timestamp
             this_interval_s = None
             for group in ['current', 'interval']:
                 if not m.group(group):
@@ -84,7 +99,13 @@ class Summarizer(object):
                         else:
                             stat = stat + '__rate'
                             val = val / this_interval_s
+                            if stat in ['user+sys__rate', 'tx+rx__rate']:
+                                task.series[category, stat].append(
+                                    (timestamp - task.starttime, val))
                     else:
+                        if stat in ['rss']:
+                            task.series[category, stat].append(
+                                (timestamp - task.starttime, val))
                         self.task_stats[task_id][category][stat] = val
                     if val > self.stats_max[category][stat]:
                         self.stats_max[category][stat] = val
@@ -98,12 +119,15 @@ class Summarizer(object):
                         continue
                     self.job_tot[category][stat] += val
 
-    def report(self):
+    def text_report(self):
         return "\n".join(itertools.chain(
-            self._report_gen(),
+            self._text_report_gen(),
             self._recommend_gen())) + "\n"
 
-    def _report_gen(self):
+    def html_report(self):
+        return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+
+    def _text_report_gen(self):
         yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
         for category, stat_max in sorted(self.stats_max.iteritems()):
             for stat, val in sorted(stat_max.iteritems()):
@@ -246,11 +270,11 @@ class PipelineSummarizer():
         for summarizer in self.summarizers.itervalues():
             summarizer.run()
 
-    def report(self):
+    def text_report(self):
         txt = ''
         for cname, summarizer in self.summarizers.iteritems():
             txt += '### Summary for {} ({})\n'.format(
                 cname, summarizer.job['uuid'])
-            txt += summarizer.report()
+            txt += summarizer.text_report()
             txt += '\n'
         return txt
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index cf810ae..09e21a1 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -1,7 +1,6 @@
 import arvados
 import collections
 import crunchstat_summary.command
-import crunchstat_summary.summarizer
 import difflib
 import glob
 import gzip
@@ -9,17 +8,17 @@ import mock
 import os
 import unittest
 
-
 TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
+
 class ReportDiff(unittest.TestCase):
-    def diff_known_report(self, logfile, summarizer):
+    def diff_known_report(self, logfile, cmd):
         expectfile = logfile+'.report'
         expect = open(expectfile).readlines()
-        self.diff_report(summarizer, expect, expectfile=expectfile)
+        self.diff_report(cmd, expect, expectfile=expectfile)
 
-    def diff_report(self, summarizer, expect, expectfile=None):
-        got = [x+"\n" for x in summarizer.report().strip("\n").split("\n")]
+    def diff_report(self, cmd, expect, expectfile=None):
+        got = [x+"\n" for x in cmd.report().strip("\n").split("\n")]
         self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
             expect, got, fromfile=expectfile, tofile="(generated)")))
 
@@ -30,9 +29,9 @@ class SummarizeFile(ReportDiff):
             logfile = os.path.join(TESTS_DIR, fnm)
             args = crunchstat_summary.command.ArgumentParser().parse_args(
                 ['--log-file', logfile])
-            summarizer = crunchstat_summary.command.Command(args).summarizer()
-            summarizer.run()
-            self.diff_known_report(logfile, summarizer)
+            cmd = crunchstat_summary.command.Command(args)
+            cmd.run()
+            self.diff_known_report(logfile, cmd)
 
 
 class SummarizeJob(ReportDiff):
@@ -52,9 +51,9 @@ class SummarizeJob(ReportDiff):
         mock_cr().open.return_value = gzip.open(self.logfile)
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_job_uuid])
-        summarizer = crunchstat_summary.command.Command(args).summarizer()
-        summarizer.run()
-        self.diff_known_report(self.logfile, summarizer)
+        cmd = crunchstat_summary.command.Command(args)
+        cmd.run()
+        self.diff_known_report(self.logfile, cmd)
         mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
         mock_cr.assert_called_with(self.fake_log_id)
         mock_cr().open.assert_called_with('fake-logfile.txt')
@@ -113,8 +112,8 @@ class SummarizePipeline(ReportDiff):
         mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--pipeline-instance', self.fake_instance['uuid']])
-        summarizer = crunchstat_summary.command.Command(args).summarizer()
-        summarizer.run()
+        cmd = crunchstat_summary.command.Command(args)
+        cmd.run()
 
         job_report = [
             line for line in open(logfile+'.report').readlines()
@@ -126,7 +125,7 @@ class SummarizePipeline(ReportDiff):
             job_report + ['\n'] +
             ['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
             job_report)
-        self.diff_report(summarizer, expect)
+        self.diff_report(cmd, expect)
         mock_cr.assert_has_calls(
             [
                 mock.call('fake-log-pdh-0'),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list