[ARVADOS] updated: 423aa837838de86356440127a107653cf1c69ca3
git at public.curoverse.com
git at public.curoverse.com
Mon Jan 4 09:42:25 EST 2016
Summary of changes:
tools/crunchstat-summary/crunchstat_summary/chartjs.py | 4 ++--
tools/crunchstat-summary/crunchstat_summary/command.py | 9 ++++-----
2 files changed, 6 insertions(+), 7 deletions(-)
discards 73deed377f62fe459c55c71c0602936ca55a3c28 (commit)
discards 7fa204d6bcf35d36ec8be12d0e80de6a6e701f86 (commit)
discards 5271b2180f18715d7fd09c3d4d4289fbcd2287aa (commit)
via 423aa837838de86356440127a107653cf1c69ca3 (commit)
via 45e33a52bfdc8677e3eaf5561decf71100a474d7 (commit)
via d85fc3bcf31f146d93250587ea24d2afd53044ac (commit)
This update added new revisions after undoing existing revisions. That is
to say, the old revision is not a strict subset of the new revision. This
situation occurs when you --force push a change and generate a repository
containing something like this:
* -- * -- B -- O -- O -- O (73deed377f62fe459c55c71c0602936ca55a3c28)
\
N -- N -- N (423aa837838de86356440127a107653cf1c69ca3)
When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 423aa837838de86356440127a107653cf1c69ca3
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Dec 24 13:51:35 2015 -0500
7883: Add option to include stats from child jobs.
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index 2c7574c..a5339dd 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -21,6 +21,9 @@ class ArgumentParser(argparse.ArgumentParser):
'--log-file', type=str,
help='Read log data from a regular file')
self.add_argument(
+ '--include-child-jobs', action='store_true',
+ help='Include stats from child jobs')
+ self.add_argument(
'--format', type=str, choices=('html', 'text'), default='text',
help='Report format')
self.add_argument(
@@ -40,18 +43,21 @@ class Command(object):
logger.setLevel(logging.INFO)
def run(self):
+ kwargs = {
+ 'include_child_jobs': self.args.include_child_jobs,
+ }
if self.args.pipeline_instance:
- self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
+ self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance, **kwargs)
elif self.args.job:
- self.summer = summarizer.JobSummarizer(self.args.job)
+ self.summer = summarizer.JobSummarizer(self.args.job, **kwargs)
elif self.args.log_file:
if self.args.log_file.endswith('.gz'):
fh = gzip.open(self.args.log_file)
else:
fh = open(self.args.log_file)
- self.summer = summarizer.Summarizer(fh)
+ self.summer = summarizer.Summarizer(fh, **kwargs)
else:
- self.summer = summarizer.Summarizer(sys.stdin)
+ self.summer = summarizer.Summarizer(sys.stdin, **kwargs)
return self.summer.run()
def report(self):
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 6396b5d..d6c1916 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -28,15 +28,14 @@ class Task(object):
class Summarizer(object):
existing_constraints = {}
- def __init__(self, logdata, label=None):
+ def __init__(self, logdata, label=None, include_child_jobs=True):
self._logdata = logdata
+
self.label = label
self.starttime = None
self.finishtime = None
- logger.debug("%s: logdata %s", self.label, repr(logdata))
+ self._include_child_jobs = include_child_jobs
- def run(self):
- logger.debug("%s: parsing log data", self.label)
# stats_max: {category: {stat: val}}
self.stats_max = collections.defaultdict(
functools.partial(collections.defaultdict,
@@ -44,19 +43,52 @@ class Summarizer(object):
# task_stats: {task_id: {category: {stat: val}}}
self.task_stats = collections.defaultdict(
functools.partial(collections.defaultdict, dict))
+
+ self.seq_to_uuid = {}
self.tasks = collections.defaultdict(Task)
+
+ logger.debug("%s: logdata %s", self.label, repr(logdata))
+
+ def run(self):
+ logger.debug("%s: parsing log data", self.label)
for line in self._logdata:
+ m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
+ if m:
+ seq = int(m.group('seq'))
+ uuid = m.group('task_uuid')
+ self.seq_to_uuid[seq] = uuid
+ logger.debug('%s: seq %d is task %s', self.label, seq, uuid)
+ continue
+
m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
if m:
- task_id = m.group('seq')
+ task_id = self.seq_to_uuid[int(m.group('seq'))]
elapsed = int(m.group('elapsed'))
self.task_stats[task_id]['time'] = {'elapsed': elapsed}
if elapsed > self.stats_max['time']['elapsed']:
self.stats_max['time']['elapsed'] = elapsed
continue
+
+ m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
+ if m:
+ uuid = m.group('uuid')
+ if not self._include_child_jobs:
+ logger.warning('%s: omitting %s (try --include-child-job)',
+ self.label, uuid)
+ continue
+ logger.debug('%s: follow %s', self.label, uuid)
+ child_summarizer = JobSummarizer(uuid)
+ child_summarizer.stats_max = self.stats_max
+ child_summarizer.task_stats = self.task_stats
+ child_summarizer.tasks = self.tasks
+ child_summarizer.run()
+ logger.debug('%s: done %s', self.label, uuid)
+ continue
+
m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
if not m:
continue
+
if self.label is None:
self.label = m.group('job_uuid')
logger.debug('%s: using job uuid as label', self.label)
@@ -65,7 +97,7 @@ class Summarizer(object):
continue
elif m.group('category') == 'error':
continue
- task_id = m.group('seq')
+ task_id = self.seq_to_uuid[int(m.group('seq'))]
task = self.tasks[task_id]
# Use the first and last crunchstat timestamps as
@@ -126,6 +158,8 @@ class Summarizer(object):
self.task_stats[task_id][category][stat] = val
if val > self.stats_max[category][stat]:
self.stats_max[category][stat] = val
+ logger.debug('%s: done parsing', self.label)
+
self.job_tot = collections.defaultdict(
functools.partial(collections.defaultdict, int))
for task_id, task_stat in self.task_stats.iteritems():
@@ -135,6 +169,7 @@ class Summarizer(object):
# meaningless stats like 16 cpu cores x 5 tasks = 80
continue
self.job_tot[category][stat] += val
+ logger.debug('%s: done totals', self.label)
def long_label(self):
label = self.label
@@ -169,6 +204,9 @@ class Summarizer(object):
tot = self._format(self.job_tot[category].get(stat, '-'))
yield "\t".join([category, stat, str(val), max_rate, tot])
for args in (
+ ('Number of tasks: {}',
+ len(self.tasks),
+ None),
('Max CPU time spent by a single task: {}s',
self.stats_max['cpu']['user+sys'],
None),
@@ -249,7 +287,8 @@ class Summarizer(object):
class CollectionSummarizer(Summarizer):
- def __init__(self, collection_id):
+ def __init__(self, collection_id, **kwargs):
+ logger.debug('load collection %s', collection_id)
collection = arvados.collection.CollectionReader(collection_id)
filenames = [filename for filename in collection]
if len(filenames) != 1:
@@ -257,12 +296,12 @@ class CollectionSummarizer(Summarizer):
"collection {} has {} files; need exactly one".format(
collection_id, len(filenames)))
super(CollectionSummarizer, self).__init__(
- collection.open(filenames[0]))
+ collection.open(filenames[0]), **kwargs)
self.label = collection_id
class JobSummarizer(CollectionSummarizer):
- def __init__(self, job):
+ def __init__(self, job, **kwargs):
arv = arvados.api('v1')
if isinstance(job, str):
self.job = arv.jobs().get(uuid=job).execute()
@@ -274,12 +313,12 @@ class JobSummarizer(CollectionSummarizer):
raise ValueError(
"job {} has no log; live summary not implemented".format(
self.job['uuid']))
- super(JobSummarizer, self).__init__(self.job['log'])
+ super(JobSummarizer, self).__init__(self.job['log'], **kwargs)
self.label = self.job['uuid']
class PipelineSummarizer():
- def __init__(self, pipeline_instance_uuid):
+ def __init__(self, pipeline_instance_uuid, **kwargs):
arv = arvados.api('v1', model=OrderedJsonModel())
instance = arv.pipeline_instances().get(
uuid=pipeline_instance_uuid).execute()
@@ -295,7 +334,7 @@ class PipelineSummarizer():
else:
logger.info(
"%s: logdata %s", cname, component['job']['log'])
- summarizer = JobSummarizer(component['job'])
+ summarizer = JobSummarizer(component['job'], **kwargs)
summarizer.label = cname
self.summarizers[cname] = summarizer
self.label = pipeline_instance_uuid
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index b12e931..0ba0181 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -22,11 +22,12 @@ net:keep0 rx 0 0.00 0
net:keep0 tx 0 0.00 0
net:keep0 tx+rx 0 0.00 0
time elapsed 80 - 80
+# Number of tasks: 1
# Max CPU time spent by a single task: 5.75s
# Max CPU usage in a single interval: 13.00%
# Overall CPU usage: 7.19%
# Max memory used by a single task: 0.35GB
# Max network traffic in a single task: 1.79GB
# Max network speed in a single interval: 42.58MB/s
-#!! job max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
-#!! job max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index 8e1a2d8..0641bba 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -11,8 +11,9 @@ net:eth0 rx 90 - 90
net:eth0 tx 90 - 90
net:eth0 tx+rx 180 - 180
time elapsed 2 - 4
+# Number of tasks: 2
# Max CPU time spent by a single task: 0.00s
# Overall CPU usage: 0.00%
# Max memory used by a single task: 0.00GB
# Max network traffic in a single task: 0.00GB
-#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index dbe9321..19fe0ed 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -11,8 +11,9 @@ net:eth0 rx 90 - 90
net:eth0 tx 90 - 90
net:eth0 tx+rx 180 - 180
time elapsed 2 - 3
+# Number of tasks: 2
# Max CPU time spent by a single task: 0.00s
# Overall CPU usage: 0.00%
# Max memory used by a single task: 0.00GB
# Max network traffic in a single task: 0.00GB
-#!! job max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index 09e21a1..d35e81e 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -35,7 +35,7 @@ class SummarizeFile(ReportDiff):
class SummarizeJob(ReportDiff):
- fake_job_uuid = 'zzzzz-8i9sb-jjjjjjjjjjjjjjj'
+ fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
fake_log_id = 'fake-log-collection-id'
fake_job = {
'uuid': fake_job_uuid,
commit 45e33a52bfdc8677e3eaf5561decf71100a474d7
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Dec 23 14:09:37 2015 -0500
7883: Generate multiple sets of charts when data source is a pipeline instance.
diff --git a/tools/crunchstat-summary/crunchstat_summary/__init__.py b/tools/crunchstat-summary/crunchstat_summary/__init__.py
index e69de29..c10988d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/__init__.py
+++ b/tools/crunchstat-summary/crunchstat_summary/__init__.py
@@ -0,0 +1,4 @@
+import logging
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
index 3106e62..72ff7a4 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.js
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -1,11 +1,27 @@
window.onload = function() {
- var options = {};
- chartData.forEach(function(data, idx) {
- var div = document.createElement('div');
- div.setAttribute('id', 'chart-'+idx);
- div.setAttribute('style', 'width: 100%; height: 150px');
- document.body.appendChild(div);
- var chart = new CanvasJS.Chart('chart-'+idx, data);
- chart.render();
+ var charts = {};
+ sections.forEach(function(section, section_idx) {
+ var h1 = document.createElement('h1');
+ h1.appendChild(document.createTextNode(section.label));
+ document.body.appendChild(h1);
+ section.charts.forEach(function(data, chart_idx) {
+ // Skip chart if every series has zero data points
+ if (0 == data.data.reduce(function(len, series) {
+ return len + series.dataPoints.length;
+ }, 0)) {
+ return;
+ }
+ var id = 'chart-'+section_idx+'-'+chart_idx;
+ var div = document.createElement('div');
+ div.setAttribute('id', id);
+ div.setAttribute('style', 'width: 100%; height: 150px');
+ document.body.appendChild(div);
+ charts[id] = new CanvasJS.Chart(id, data);
+ charts[id].render();
+ });
});
-}
+
+ if (typeof window.debug === 'undefined')
+ window.debug = {};
+ window.debug.charts = charts;
+};
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
index 85b49b8..590df27 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -3,13 +3,15 @@ from __future__ import print_function
import json
import pkg_resources
+from crunchstat_summary import logger
+
class ChartJS(object):
- JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+ JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.min.js'
- def __init__(self, label, tasks):
+ def __init__(self, label, summarizers):
self.label = label
- self.tasks = tasks
+ self.summarizers = summarizers
def html(self):
return '''<!doctype html><html><head>
@@ -20,31 +22,48 @@ class ChartJS(object):
'''.format(self.label, self.JSLIB, self.js())
def js(self):
- return 'var chartData = {};\n{}'.format(
- json.dumps(self.chartData()),
+ return 'var sections = {};\n{}'.format(
+ json.dumps(self.sections()),
pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
- def chartData(self):
- maxpts = 0
- for task in self.tasks.itervalues():
- for series in task.series.itervalues():
- maxpts = max(maxpts, len(series))
+ def sections(self):
return [
{
- 'title': {
- 'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+ 'label': s.long_label(),
+ 'charts': self.charts(s.label, s.tasks),
+ }
+ for s in self.summarizers]
+
+ def charts(self, label, tasks):
+ return [
+ {
+ 'axisY': {
+ 'minimum': 0,
},
'data': [
{
'type': 'line',
- 'dataPoints': [
- {'x': pt[0].total_seconds(), 'y': pt[1]}
- for pt in task.series[stat]]
+ 'markerType': 'none',
+ 'dataPoints': self._datapoints(
+ label=uuid, task=task, series=task.series[stat]),
}
- for label, task in self.tasks.iteritems()
+ for uuid, task in tasks.iteritems()
],
+ 'title': {
+ 'text': '{}: {} {}'.format(label, stat[0], stat[1]),
+ },
+ 'zoomEnabled': True,
}
for stat in (('cpu', 'user+sys__rate'),
+ ('mem', 'rss'),
('net:eth0', 'tx+rx__rate'),
- ('mem', 'rss'))
- ]
+ ('net:keep0', 'tx+rx__rate'))]
+
+ def _datapoints(self, label, task, series):
+ points = [
+ {'x': pt[0].total_seconds(), 'y': pt[1]}
+ for pt in series]
+ if len(points) > 0:
+ points[-1]['markerType'] = 'cross'
+ points[-1]['markerSize'] = 12
+ return points
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index c28b00f..2c7574c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -1,8 +1,9 @@
import argparse
import gzip
+import logging
import sys
-from crunchstat_summary import summarizer
+from crunchstat_summary import logger, summarizer
class ArgumentParser(argparse.ArgumentParser):
@@ -22,11 +23,21 @@ class ArgumentParser(argparse.ArgumentParser):
self.add_argument(
'--format', type=str, choices=('html', 'text'), default='text',
help='Report format')
+ self.add_argument(
+ '--verbose', action='store_true',
+ help='Write progress messages to stderr')
+ self.add_argument(
+ '--debug', action='store_true',
+ help='Write debug messages to stderr')
class Command(object):
def __init__(self, args):
self.args = args
+ if args.debug:
+ logger.setLevel(logging.DEBUG)
+ elif args.verbose:
+ logger.setLevel(logging.INFO)
def run(self):
if self.args.pipeline_instance:
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index b630a0c..6396b5d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -6,13 +6,12 @@ import crunchstat_summary.chartjs
import datetime
import functools
import itertools
-import logging
import math
import re
import sys
-logger = logging.getLogger(__name__)
-logger.addHandler(logging.NullHandler())
+from arvados.api import OrderedJsonModel
+from crunchstat_summary import logger
# Recommend memory constraints that are this multiple of an integral
# number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
@@ -29,11 +28,15 @@ class Task(object):
class Summarizer(object):
existing_constraints = {}
- def __init__(self, logdata, label='job'):
+ def __init__(self, logdata, label=None):
self._logdata = logdata
self.label = label
+ self.starttime = None
+ self.finishtime = None
+ logger.debug("%s: logdata %s", self.label, repr(logdata))
def run(self):
+ logger.debug("%s: parsing log data", self.label)
# stats_max: {category: {stat: val}}
self.stats_max = collections.defaultdict(
functools.partial(collections.defaultdict,
@@ -51,20 +54,34 @@ class Summarizer(object):
if elapsed > self.stats_max['time']['elapsed']:
self.stats_max['time']['elapsed'] = elapsed
continue
- m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+ m = re.search(r'^(?P<timestamp>\S+) (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
if not m:
continue
+ if self.label is None:
+ self.label = m.group('job_uuid')
+ logger.debug('%s: using job uuid as label', self.label)
if m.group('category').endswith(':'):
# "notice:" etc.
continue
elif m.group('category') == 'error':
continue
task_id = m.group('seq')
+ task = self.tasks[task_id]
+
+ # Use the first and last crunchstat timestamps as
+ # approximations of starttime and finishtime.
timestamp = datetime.datetime.strptime(
m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
- task = self.tasks[task_id]
if not task.starttime:
task.starttime = timestamp
+ logger.debug('%s: task %s starttime %s',
+ self.label, task_id, timestamp)
+ task.finishtime = timestamp
+
+ if not self.starttime:
+ self.starttime = timestamp
+ self.finishtime = timestamp
+
this_interval_s = None
for group in ['current', 'interval']:
if not m.group(group):
@@ -119,13 +136,27 @@ class Summarizer(object):
continue
self.job_tot[category][stat] += val
+ def long_label(self):
+ label = self.label
+ if self.finishtime:
+ label += ' -- elapsed time '
+ s = (self.finishtime - self.starttime).total_seconds()
+ if s > 86400:
+ label += '{}d'.format(int(s/86400))
+ if s > 3600:
+ label += '{}h'.format(int(s/3600) % 24)
+ if s > 60:
+ label += '{}m'.format(int(s/60) % 60)
+ label += '{}s'.format(int(s) % 60)
+ return label
+
def text_report(self):
return "\n".join(itertools.chain(
self._text_report_gen(),
self._recommend_gen())) + "\n"
def html_report(self):
- return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+ return crunchstat_summary.chartjs.ChartJS(self.label, [self]).html()
def _text_report_gen(self):
yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
@@ -227,6 +258,7 @@ class CollectionSummarizer(Summarizer):
collection_id, len(filenames)))
super(CollectionSummarizer, self).__init__(
collection.open(filenames[0]))
+ self.label = collection_id
class JobSummarizer(CollectionSummarizer):
@@ -243,11 +275,12 @@ class JobSummarizer(CollectionSummarizer):
"job {} has no log; live summary not implemented".format(
self.job['uuid']))
super(JobSummarizer, self).__init__(self.job['log'])
+ self.label = self.job['uuid']
class PipelineSummarizer():
def __init__(self, pipeline_instance_uuid):
- arv = arvados.api('v1')
+ arv = arvados.api('v1', model=OrderedJsonModel())
instance = arv.pipeline_instances().get(
uuid=pipeline_instance_uuid).execute()
self.summarizers = collections.OrderedDict()
@@ -260,11 +293,12 @@ class PipelineSummarizer():
"%s: skipping job %s with no log available",
cname, component['job'].get('uuid'))
else:
- logger.debug(
- "%s: reading log from %s", cname, component['job']['log'])
+ logger.info(
+ "%s: logdata %s", cname, component['job']['log'])
summarizer = JobSummarizer(component['job'])
summarizer.label = cname
self.summarizers[cname] = summarizer
+ self.label = pipeline_instance_uuid
def run(self):
for summarizer in self.summarizers.itervalues():
@@ -278,3 +312,7 @@ class PipelineSummarizer():
txt += summarizer.text_report()
txt += '\n'
return txt
+
+ def html_report(self):
+ return crunchstat_summary.chartjs.ChartJS(
+ self.label, self.summarizers.itervalues()).html()
commit d85fc3bcf31f146d93250587ea24d2afd53044ac
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Dec 23 11:39:14 2015 -0500
7883: Add option (--format html) to generate canvasjs charts.
diff --git a/tools/crunchstat-summary/bin/crunchstat-summary b/tools/crunchstat-summary/bin/crunchstat-summary
index c32b50e..e16bd8e 100755
--- a/tools/crunchstat-summary/bin/crunchstat-summary
+++ b/tools/crunchstat-summary/bin/crunchstat-summary
@@ -10,6 +10,6 @@ import sys
logging.getLogger().addHandler(logging.StreamHandler())
args = crunchstat_summary.command.ArgumentParser().parse_args(sys.argv[1:])
-s = crunchstat_summary.command.Command(args).summarizer()
-s.run()
-print(s.report(), end='')
+cmd = crunchstat_summary.command.Command(args)
+cmd.run()
+print(cmd.report(), end='')
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.js b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
new file mode 100644
index 0000000..3106e62
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.js
@@ -0,0 +1,11 @@
+window.onload = function() {
+ var options = {};
+ chartData.forEach(function(data, idx) {
+ var div = document.createElement('div');
+ div.setAttribute('id', 'chart-'+idx);
+ div.setAttribute('style', 'width: 100%; height: 150px');
+ document.body.appendChild(div);
+ var chart = new CanvasJS.Chart('chart-'+idx, data);
+ chart.render();
+ });
+}
diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
new file mode 100644
index 0000000..85b49b8
--- /dev/null
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+
+import json
+import pkg_resources
+
+
+class ChartJS(object):
+ JSLIB = 'https://cdnjs.cloudflare.com/ajax/libs/canvasjs/1.7.0/canvasjs.js'
+
+ def __init__(self, label, tasks):
+ self.label = label
+ self.tasks = tasks
+
+ def html(self):
+ return '''<!doctype html><html><head>
+ <title>{} stats</title>
+ <script type="text/javascript" src="{}"></script>
+ <script type="text/javascript">{}</script>
+ </head><body></body></html>
+ '''.format(self.label, self.JSLIB, self.js())
+
+ def js(self):
+ return 'var chartData = {};\n{}'.format(
+ json.dumps(self.chartData()),
+ pkg_resources.resource_string('crunchstat_summary', 'chartjs.js'))
+
+ def chartData(self):
+ maxpts = 0
+ for task in self.tasks.itervalues():
+ for series in task.series.itervalues():
+ maxpts = max(maxpts, len(series))
+ return [
+ {
+ 'title': {
+ 'text': '{}: {} {}'.format(self.label, stat[0], stat[1]),
+ },
+ 'data': [
+ {
+ 'type': 'line',
+ 'dataPoints': [
+ {'x': pt[0].total_seconds(), 'y': pt[1]}
+ for pt in task.series[stat]]
+ }
+ for label, task in self.tasks.iteritems()
+ ],
+ }
+ for stat in (('cpu', 'user+sys__rate'),
+ ('net:eth0', 'tx+rx__rate'),
+ ('mem', 'rss'))
+ ]
diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index fc37190..c28b00f 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -19,22 +19,32 @@ class ArgumentParser(argparse.ArgumentParser):
src.add_argument(
'--log-file', type=str,
help='Read log data from a regular file')
+ self.add_argument(
+ '--format', type=str, choices=('html', 'text'), default='text',
+ help='Report format')
class Command(object):
def __init__(self, args):
self.args = args
- def summarizer(self):
+ def run(self):
if self.args.pipeline_instance:
- return summarizer.PipelineSummarizer(self.args.pipeline_instance)
+ self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance)
elif self.args.job:
- return summarizer.JobSummarizer(self.args.job)
+ self.summer = summarizer.JobSummarizer(self.args.job)
elif self.args.log_file:
if self.args.log_file.endswith('.gz'):
fh = gzip.open(self.args.log_file)
else:
fh = open(self.args.log_file)
- return summarizer.Summarizer(fh)
+ self.summer = summarizer.Summarizer(fh)
else:
- return summarizer.Summarizer(sys.stdin)
+ self.summer = summarizer.Summarizer(sys.stdin)
+ return self.summer.run()
+
+ def report(self):
+ if self.args.format == 'html':
+ return self.summer.html_report()
+ elif self.args.format == 'text':
+ return self.summer.text_report()
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index f5d27d4..b630a0c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -2,6 +2,8 @@ from __future__ import print_function
import arvados
import collections
+import crunchstat_summary.chartjs
+import datetime
import functools
import itertools
import logging
@@ -17,6 +19,13 @@ logger.addHandler(logging.NullHandler())
# that have amounts like 7.5 GiB according to the kernel.)
AVAILABLE_RAM_RATIO = 0.95
+
+class Task(object):
+ def __init__(self):
+ self.starttime = None
+ self.series = collections.defaultdict(list)
+
+
class Summarizer(object):
existing_constraints = {}
@@ -32,6 +41,7 @@ class Summarizer(object):
# task_stats: {task_id: {category: {stat: val}}}
self.task_stats = collections.defaultdict(
functools.partial(collections.defaultdict, dict))
+ self.tasks = collections.defaultdict(Task)
for line in self._logdata:
m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) success in (?P<elapsed>\d+) seconds', line)
if m:
@@ -41,7 +51,7 @@ class Summarizer(object):
if elapsed > self.stats_max['time']['elapsed']:
self.stats_max['time']['elapsed'] = elapsed
continue
- m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
+ m = re.search(r'^(?P<timestamp>\S+) \S+ \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n', line)
if not m:
continue
if m.group('category').endswith(':'):
@@ -50,6 +60,11 @@ class Summarizer(object):
elif m.group('category') == 'error':
continue
task_id = m.group('seq')
+ timestamp = datetime.datetime.strptime(
+ m.group('timestamp'), '%Y-%m-%d_%H:%M:%S')
+ task = self.tasks[task_id]
+ if not task.starttime:
+ task.starttime = timestamp
this_interval_s = None
for group in ['current', 'interval']:
if not m.group(group):
@@ -84,7 +99,13 @@ class Summarizer(object):
else:
stat = stat + '__rate'
val = val / this_interval_s
+ if stat in ['user+sys__rate', 'tx+rx__rate']:
+ task.series[category, stat].append(
+ (timestamp - task.starttime, val))
else:
+ if stat in ['rss']:
+ task.series[category, stat].append(
+ (timestamp - task.starttime, val))
self.task_stats[task_id][category][stat] = val
if val > self.stats_max[category][stat]:
self.stats_max[category][stat] = val
@@ -98,12 +119,15 @@ class Summarizer(object):
continue
self.job_tot[category][stat] += val
- def report(self):
+ def text_report(self):
return "\n".join(itertools.chain(
- self._report_gen(),
+ self._text_report_gen(),
self._recommend_gen())) + "\n"
- def _report_gen(self):
+ def html_report(self):
+ return crunchstat_summary.chartjs.ChartJS(self.label, self.tasks).html()
+
+ def _text_report_gen(self):
yield "\t".join(['category', 'metric', 'task_max', 'task_max_rate', 'job_total'])
for category, stat_max in sorted(self.stats_max.iteritems()):
for stat, val in sorted(stat_max.iteritems()):
@@ -246,11 +270,11 @@ class PipelineSummarizer():
for summarizer in self.summarizers.itervalues():
summarizer.run()
- def report(self):
+ def text_report(self):
txt = ''
for cname, summarizer in self.summarizers.iteritems():
txt += '### Summary for {} ({})\n'.format(
cname, summarizer.job['uuid'])
- txt += summarizer.report()
+ txt += summarizer.text_report()
txt += '\n'
return txt
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index cf810ae..09e21a1 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -1,7 +1,6 @@
import arvados
import collections
import crunchstat_summary.command
-import crunchstat_summary.summarizer
import difflib
import glob
import gzip
@@ -9,17 +8,17 @@ import mock
import os
import unittest
-
TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+
class ReportDiff(unittest.TestCase):
- def diff_known_report(self, logfile, summarizer):
+ def diff_known_report(self, logfile, cmd):
expectfile = logfile+'.report'
expect = open(expectfile).readlines()
- self.diff_report(summarizer, expect, expectfile=expectfile)
+ self.diff_report(cmd, expect, expectfile=expectfile)
- def diff_report(self, summarizer, expect, expectfile=None):
- got = [x+"\n" for x in summarizer.report().strip("\n").split("\n")]
+ def diff_report(self, cmd, expect, expectfile=None):
+ got = [x+"\n" for x in cmd.report().strip("\n").split("\n")]
self.assertEqual(got, expect, "\n"+"".join(difflib.context_diff(
expect, got, fromfile=expectfile, tofile="(generated)")))
@@ -30,9 +29,9 @@ class SummarizeFile(ReportDiff):
logfile = os.path.join(TESTS_DIR, fnm)
args = crunchstat_summary.command.ArgumentParser().parse_args(
['--log-file', logfile])
- summarizer = crunchstat_summary.command.Command(args).summarizer()
- summarizer.run()
- self.diff_known_report(logfile, summarizer)
+ cmd = crunchstat_summary.command.Command(args)
+ cmd.run()
+ self.diff_known_report(logfile, cmd)
class SummarizeJob(ReportDiff):
@@ -52,9 +51,9 @@ class SummarizeJob(ReportDiff):
mock_cr().open.return_value = gzip.open(self.logfile)
args = crunchstat_summary.command.ArgumentParser().parse_args(
['--job', self.fake_job_uuid])
- summarizer = crunchstat_summary.command.Command(args).summarizer()
- summarizer.run()
- self.diff_known_report(self.logfile, summarizer)
+ cmd = crunchstat_summary.command.Command(args)
+ cmd.run()
+ self.diff_known_report(self.logfile, cmd)
mock_api().jobs().get.assert_called_with(uuid=self.fake_job_uuid)
mock_cr.assert_called_with(self.fake_log_id)
mock_cr().open.assert_called_with('fake-logfile.txt')
@@ -113,8 +112,8 @@ class SummarizePipeline(ReportDiff):
mock_cr().open.side_effect = [gzip.open(logfile) for _ in range(3)]
args = crunchstat_summary.command.ArgumentParser().parse_args(
['--pipeline-instance', self.fake_instance['uuid']])
- summarizer = crunchstat_summary.command.Command(args).summarizer()
- summarizer.run()
+ cmd = crunchstat_summary.command.Command(args)
+ cmd.run()
job_report = [
line for line in open(logfile+'.report').readlines()
@@ -126,7 +125,7 @@ class SummarizePipeline(ReportDiff):
job_report + ['\n'] +
['### Summary for baz (zzzzz-8i9sb-000000000000002)\n'] +
job_report)
- self.diff_report(summarizer, expect)
+ self.diff_report(cmd, expect)
mock_cr.assert_has_calls(
[
mock.call('fake-log-pdh-0'),
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list