[ARVADOS] updated: 6e704b98eb77e0a35dcfef1c8302f9ac14b4d98e
git at public.curoverse.com
git at public.curoverse.com
Wed Dec 23 14:09:46 EST 2015
Summary of changes:
tools/crunchstat-summary/bin/crunchstat-summary | 6 +--
.../crunchstat_summary/__init__.py | 4 ++
.../crunchstat_summary/chartjs.js | 16 ++++++
.../crunchstat_summary/chartjs.py | 56 +++++++++++++++++++
.../crunchstat_summary/command.py | 34 +++++++++---
.../crunchstat_summary/summarizer.py | 63 +++++++++++++++++-----
tools/crunchstat-summary/tests/test_examples.py | 29 +++++-----
7 files changed, 171 insertions(+), 37 deletions(-)
create mode 100644 tools/crunchstat-summary/crunchstat_summary/chartjs.js
create mode 100644 tools/crunchstat-summary/crunchstat_summary/chartjs.py
via 6e704b98eb77e0a35dcfef1c8302f9ac14b4d98e (commit)
via e8344fa1f4b54edbfe181596511bcdddc1194991 (commit)
from be854bfb8bab5849acb09fcb5acbeeac7300ca5c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 6e704b98eb77e0a35dcfef1c8302f9ac14b4d98e
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Dec 23 14:09:37 2015 -0500
7883: Charts for pipeline instance
diff --git a/tools/crunchstat-summary/crunchstat_summary/__init__.py b/tools/crunchstat-summary/crunchstat_summary/__init__.py
index e69de29..c10988d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/__init__.py
+++ b/tools/crunchstat-summary/crunchstat_summary/__init__.py
@@ -0,0 +1,4 @@
+import logging
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
index 3106e62..6e1b8dc 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.js
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -1,11 +1,16 @@
window.onload = function() {
- var options = {};
- chartData.forEach(function(data, idx) {
- var div = document.createElement('div');
- div.setAttribute('id', 'chart-'+idx);
- div.setAttribute('style', 'width: 100%; height: 150px');
- document.body.appendChild(div);
- var chart = new CanvasJS.Chart('chart-'+idx, data);
- chart.render();
+ sections.forEach(function(section, section_idx) {
+ var h1 = document.createElement('h1');
+ h1.appendChild(document.createTextNode(section.label));
+ document.body.appendChild(h1);
+ section.charts.forEach(function(data, chart_idx) {
+ var id = 'chart-'+section_idx+'-'+chart_idx;
+ var div = document.createElement('div');
+ div.setAttribute('id', id);
+ div.setAttribute('style', 'width: 100%; height: 150px');
+ document.body.appendChild(div);
+ var chart = new CanvasJS.Chart(id, data);
+ chart.render();
+ });
});
-}
+};
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
index 85b49b8..b043710 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -3,13 +3,15 @@ from __future__ import print_function
import json
import pkg_resources
+from crunchstat_summary import logger
+
class ChartJS(object):
JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
- def __init__(self, label, tasks):
+ def __init__(self, label, summarizers):
self.label = label
- self.tasks = tasks
+ self.summarizers = summarizers
def html(self):
return '''<!doctype html><html><head>
@@ -20,20 +22,25 @@ class ChartJS(object):
'''.format(self.label, self.JSLIB, self.js())
def js(self):
- return 'var chartData = {};\n{}'.format(
- json.dumps(self.chartData()),
+ return 'var sections = {};\n{}'.format(
+ json.dumps(self.sections()),
pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
- def chartData(self):
- maxpts = 0
- for task in self.tasks.itervalues():
- for series in task.series.itervalues():
- maxpts = max(maxpts, len(series))
+ def sections(self):
+ return [
+ {
+ 'label': s.label,
+ 'charts': self.charts(s.label, s.tasks),
+ }
+ for s in self.summarizers]
+
+ def charts(self, label, tasks):
return [
{
'title': {
- 'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+ 'text': '{}: {} {}'.format(label, stat[0], stat[1]),
},
+ 'markerType': 'none',
'data': [
{
'type': 'line',
@@ -41,10 +48,9 @@ class ChartJS(object):
{'x': pt[0].total_seconds(), 'y': pt[1]}
for pt in task.series[stat]]
}
- for label, task in self.tasks.iteritems()
+ for task in tasks.itervalues()
],
}
for stat in (('cpu', 'user+sys__rate'),
('net:eth0', 'tx+rx__rate'),
- ('mem', 'rss'))
- ]
+ ('mem', 'rss'))]
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index ab95108..056d8df 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -1,8 +1,9 @@
import argparse
import gzip
+import logging
import sys
-from crunchstat_summary import summarizer
+from crunchstat_summary import logger, summarizer
class ArgumentParser(argparse.ArgumentParser):
@@ -23,11 +24,21 @@ class ArgumentParser(argparse.ArgumentParser):
fmt.add_argument(
'--format', type=str, choices=('html', 'text'), default='text',
help='Report format')
+ self.add_argument(
+ '--verbose', action='store_true',
+ help='Write progress messages to stderr')
+ self.add_argument(
+ '--debug', action='store_true',
+ help='Write debug messages to stderr')
class Command(object):
def __init__(self, args):
self.args = args
+ if args.debug:
+ logger.setLevel(logging.DEBUG)
+ elif args.verbose:
+ logger.setLevel(logging.INFO)
def run(self):
if self.args.pipeline_instance:
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index b630a0c..3cca66b 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -6,13 +6,12 @@ import crunchstat_summary.chartjs
import datetime
import functools
import itertools
-import logging
import math
import re
import sys
-logger = logging.getLogger(__name__)
-logger.addHandler(logging.NullHandler())
+from arvados.api import OrderedJsonModel
+from crunchstat_summary import logger
# Recommend memory constraints that are this multiple of an integral
# number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
@@ -29,11 +28,13 @@ class Task(object):
class Summarizer(object):
existing_constraints = {}
- def __init__(self, logdata, label='job'):
+ def __init__(self, logdata, label=None):
self._logdata = logdata
self.label = label
+ logger.debug("%s: logdata %s", self.label, repr(logdata))
def run(self):
+ logger.debug("%s: parsing log data", self.label)
# stats_max: {category: {stat: val}}
self.stats_max = collections.defaultdict(
functools.partial(collections.defaultdict,
@@ -51,9 +52,12 @@ class Summarizer(object):
if elapsed > self.stats_max['time']['elapsed']:
self.stats_max['time']['elapsed'] = elapsed
continue
- m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+ m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
if not m:
continue
+ if self.label is None:
+ self.label = m.group('job_uuid')
+ logger.debug('%s: using job uuid as label', self.label)
if m.group('category').endswith(':'):
# "notice:" etc.
continue
@@ -65,6 +69,8 @@ class Summarizer(object):
task = self.tasks[task_id]
if not task.starttime:
task.starttime = timestamp
+ logger.debug('%s: task %s starttime %s',
+ self.label, task_id, timestamp)
this_interval_s = None
for group in ['current', 'interval']:
if not m.group(group):
@@ -125,7 +131,7 @@ class Summarizer(object):
self._recommend_gen())) + "\n"
def html_report(self):
- return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+ return crunchstat_summary.chartjs.ChartJS(self.label, [self]).html()
def _text_report_gen(self):
yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
@@ -227,6 +233,7 @@ class CollectionSummarizer(Summarizer):
collection_id, len(filenames)))
super(CollectionSummarizer, self).__init__(
collection.open(filenames[0]))
+ self.label = collection_id
class JobSummarizer(CollectionSummarizer):
@@ -243,11 +250,12 @@ class JobSummarizer(CollectionSummarizer):
"job {} has no log; live summary not implemented".format(
self.job['uuid']))
super(JobSummarizer, self).__init__(self.job['log'])
+ self.label = self.job['uuid']
class PipelineSummarizer():
def __init__(self, pipeline_instance_uuid):
- arv = arvados.api('v1')
+ arv = arvados.api('v1', model=OrderedJsonModel())
instance = arv.pipeline_instances().get(
uuid=pipeline_instance_uuid).execute()
self.summarizers = collections.OrderedDict()
@@ -260,11 +268,12 @@ class PipelineSummarizer():
"%s: skipping job %s with no log available",
cname, component['job'].get('uuid'))
else:
- logger.debug(
- "%s: reading log from %s", cname, component['job']['log'])
+ logger.info(
+ "%s: logdata %s", cname, component['job']['log'])
summarizer = JobSummarizer(component['job'])
summarizer.label = cname
self.summarizers[cname] = summarizer
+ self.label = pipeline_instance_uuid
def run(self):
for summarizer in self.summarizers.itervalues():
@@ -278,3 +287,7 @@ class PipelineSummarizer():
txt += summarizer.text_report()
txt += '\n'
return txt
+
+ def html_report(self):
+ return crunchstat_summary.chartjs.ChartJS(
+ self.label, self.summarizers.itervalues()).html()
commit e8344fa1f4b54edbfe181596511bcdddc1194991
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Dec 23 11:39:14 2015 -0500
7883: Make charts
diff --git a/tools/crunchstat-summary/bin/crunchstat-summary b/tools/crunchstat-summary/bin/crunchstat-summary
index c32b50e..e16bd8e 100755
--- a/tools/crunchstat-summary/bin/crunchstat-summary
+++ b/tools/crunchstat-summary/bin/crunchstat-summary
@@ -10,6 +10,6 @@ import sys
logging.getLogger().addHandler(logging.StreamHandler())
args = crunchstat_summary.command.ArgumentParser().parse_args(sys.argv[1:])
-s = crunchstat_summary.command.Command(args).summarizer()
-s.run()
-print(s.report(), end='')
+cmd = crunchstat_summary.command.Command(args)
+cmd.run()
+print(cmd.report(), end='')
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
new file mode 100644
index 0000000..3106e62
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -0,0 +1,11 @@
+window.onload = function() {
+ var options = {};
+ chartData.forEach(function(data, idx) {
+ var div = document.createElement('div');
+ div.setAttribute('id', 'chart-'+idx);
+ div.setAttribute('style', 'width: 100%; height: 150px');
+ document.body.appendChild(div);
+ var chart = new CanvasJS.Chart('chart-'+idx, data);
+ chart.render();
+ });
+}
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
new file mode 100644
index 0000000..85b49b8
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+
+import json
+import pkg_resources
+
+
+class ChartJS(object):
+ JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+
+ def __init__(self, label, tasks):
+ self.label = label
+ self.tasks = tasks
+
+ def html(self):
+ return '''<!doctype html><html><head>
+ <title>{} stats</title>
+ <script type="text/javascript" src="{}"></script>
+ <script type="text/javascript">{}</script>
+ </head><body></body></html>
+ '''.format(self.label, self.JSLIB, self.js())
+
+ def js(self):
+ return 'var chartData = {};\n{}'.format(
+ json.dumps(self.chartData()),
+ pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
+
+ def chartData(self):
+ maxpts = 0
+ for task in self.tasks.itervalues():
+ for series in task.series.itervalues():
+ maxpts = max(maxpts, len(series))
+ return [
+ {
+ 'title': {
+ 'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+ },
+ 'data': [
+ {
+ 'type': 'line',
+ 'dataPoints': [
+ {'x': pt[0].total_seconds(), 'y': pt[1]}
+ for pt in task.series[stat]]
+ }
+ for label, task in self.tasks.iteritems()
+ ],
+ }
+ for stat in (('cpu', 'user+sys__rate'),
+ ('net:eth0', 'tx+rx__rate'),
+ ('mem', 'rss'))
+ ]
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index fc37190..ab95108 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -19,22 +19,33 @@ class ArgumentParser(argparse.ArgumentParser):
src.add_argument(
'--log-file', type=str,
help='Read log data from a regular file')
+ fmt = self.add_mutually_exclusive_group()
+ fmt.add_argument(
+ '--format', type=str, choices=('html', 'text'), default='text',
+ help='Report format')
class Command(object):
def __init__(self, args):
self.args = args
- def summarizer(self):
+ def run(self):
if self.args.pipeline_instance:
- return summarizer.PipelineSummarizer(self.args.pipeline_instance)
+ self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
elif self.args.job:
- return summarizer.JobSummarizer(self.args.job)
+ self.summer = summarizer.JobSummarizer(self.args.job)
elif self.args.log_file:
if self.args.log_file.endswith('.gz'):
fh = gzip.open(self.args.log_file)
else:
fh = open(self.args.log_file)
- return summarizer.Summarizer(fh)
+ self.summer = summarizer.Summarizer(fh)
else:
- return summarizer.Summarizer(sys.stdin)
+ self.summer = summarizer.Summarizer(sys.stdin)
+ return self.summer.run()
+
+ def report(self):
+ if self.args.format == 'html':
+ return self.summer.html_report()
+ elif self.args.format == 'text':
+ return self.summer.text_report()
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index f5d27d4..b630a0c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -2,6 +2,8 @@ from __future__ import print_function
import arvados
import collections
+import crunchstat_summary.chartjs
+import datetime
import functools
import itertools
import logging
@@ -17,6 +19,13 @@ logger.addHandler(logging.NullHandler())
# that have amounts like 7.5 GiB according to the kernel.)
AVAILABLE_RAM_RATIO = 0.95
+
+class Task(object):
+ def __init__(self):
+ self.starttime = None
+ self.series = collections.defaultdict(list)
+
+
class Summarizer(object):
existing_constraints = {}
@@ -32,6 +41,7 @@ class Summarizer(object):
# task_stats: {task_id: {category: {stat: val}}}
self.task_stats = collections.defaultdict(
functools.partial(collections.defaultdict, dict))
+ self.tasks = collections.defaultdict(Task)
for line in self._logdata:
m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
if m:
@@ -41,7 +51,7 @@ class Summarizer(object):
if elapsed > self.stats_max['time']['elapsed']:
self.stats_max['time']['elapsed'] = elapsed
continue
- m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+ m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
if not m:
continue
if m.group('category').endswith(':'):
@@ -50,6 +60,11 @@ class Summarizer(object):
elif m.group('category') == 'error':
continue
task_id = m.group('seq')
+ timestamp = datetime.datetime.strptime(
+ m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
+ task = self.tasks[task_id]
+ if not task.starttime:
+ task.starttime = timestamp
this_interval_s = None
for group in ['current', 'interval']:
if not m.group(group):
@@ -84,7 +99,13 @@ class Summarizer(object):
else:
stat = stat + '__rate'
val = val / this_interval_s
+ if stat in ['user+sys__rate', 'tx+rx__rate']:
+ task.series[category, stat].append(
+ (timestamp - task.starttime, val))
else:
+ if stat in ['rss']:
+ task.series[category, stat].append(
+ (timestamp - task.starttime, val))
self.task_stats[task_id][category][stat] = val
if val > self.stats_max[category][stat]:
self.stats_max[category][stat] = val
@@ -98,12 +119,15 @@ class Summarizer(object):
continue
self.job_tot[category][stat] += val
- def report(self):
+ def text_report(self):
return "\n".join(itertools.chain(
- self._report_gen(),
+ self._text_report_gen(),
self._recommend_gen())) + "\n"
- def _report_gen(self):
+ def html_report(self):
+ return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+
+ def _text_report_gen(self):
yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
for category, stat_max in sorted(self.stats_max.iteritems()):
for stat, val in sorted(stat_max.iteritems()):
@@ -246,11 +270,11 @@ class PipelineSummarizer():
for summarizer in self.summarizers.itervalues():
summarizer.run()
- def report(self):
+ def text_report(self):
txt = ''
for cname, summarizer in self.summarizers.iteritems():
txt += '### Summary for {} ({})\n'.format(
cname, summarizer.job['uuid'])
- txt += summarizer.report()
+ txt += summarizer.text_report()
txt += '\n'
return txt
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index a19d7ad..3abf344 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -1,7 +1,6 @@
import arvados
import collections
import crunchstat_summary.command
-import crunchstat_summary.summarizer
import difflib
import glob
import gzip
@@ -9,17 +8,17 @@ import mock
import os
import unittest
-
TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+
class ReportDiff(unittest.TestCase):
- def diff_known_report(self, logfile, summarizer):
+ def diff_known_report(self, logfile, cmd):
expectfile = logfile+'.report'
expect = open(expectfile).readlines()
- self.diff_report(summarizer, expect, expectfile=expectfile)
+ self.diff_report(cmd, expect, expectfile=expectfile)
- def diff_report(self, summarizer, expect, expectfile=None):
- got = [x+"\n" for x in summarizer.report().strip("\n").split("\n")]
+ def diff_report(self, cmd, expect, expectfile=None):
+ got = [x+"\n" for x in cmd.report().strip("\n").split("\n")]
self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
expect, got, fromfile=expectfile, tofile="(generated)")))
@@ -30,9 +29,9 @@ class SummarizeFile(ReportDiff):
logfile = os.path.join(TESTS_DIR, fnm)
args = crunchstat_summary.command.ArgumentParser().parse_args(
['--log-file', logfile])
- summarizer = crunchstat_summary.command.Command(args).summarizer()
- summarizer.run()
- self.diff_known_report(logfile, summarizer)
+ cmd = crunchstat_summary.command.Command(args)
+ cmd.run()
+ self.diff_known_report(logfile, cmd)
class SummarizeJob(ReportDiff):
@@ -52,9 +51,9 @@ class SummarizeJob(ReportDiff):
mock_cr().open.return_value = gzip.open(self.logfile)
args = crunchstat_summary.command.ArgumentParser().parse_args(
['--job', self.fake_job_uuid])
- summarizer = crunchstat_summary.command.Command(args).summarizer()
- summarizer.run()
- self.diff_known_report(self.logfile, summarizer)
+ cmd = crunchstat_summary.command.Command(args)
+ cmd.run()
+ self.diff_known_report(self.logfile, cmd)
mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
mock_cr.assert_called_with(self.fake_log_id)
mock_cr().open.assert_called_with('fake-logfile.txt')
@@ -113,8 +112,8 @@ class SummarizePipeline(ReportDiff):
mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
args = crunchstat_summary.command.ArgumentParser().parse_args(
['--pipeline-instance', self.fake_instance['uuid']])
- summarizer = crunchstat_summary.command.Command(args).summarizer()
- summarizer.run()
+ cmd = crunchstat_summary.command.Command(args)
+ cmd.run()
job_report = [
line for line in open(logfile+'.report').readlines()
@@ -126,7 +125,7 @@ class SummarizePipeline(ReportDiff):
job_report + ['\n'] +
['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
job_report)
- self.diff_report(summarizer, expect)
+ self.diff_report(cmd, expect)
mock_cr.assert_has_calls(
[
mock.call('fake-log-pdh-0'),
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list