[ARVADOS] updated: e1385430ac8a650aca04e95abc451d81c2c88cf5

git at public.curoverse.com git at public.curoverse.com
Wed Feb 10 11:05:07 EST 2016


Summary of changes:
 apps/workbench/Gemfile.lock         |  3 ---
 crunch_scripts/crunchutil/subst.py  |  2 +-
 crunch_scripts/run-command          | 17 +++++++++-----
 sdk/cli/bin/crunch-job              | 44 +++++++++++++++++++++++--------------
 services/api/lib/crunch_dispatch.rb |  5 ++---
 5 files changed, 43 insertions(+), 28 deletions(-)

  discards  741f862e9efd61320b0375076feb41ad49a2d1c8 (commit)
  discards  46dc751a9286801b556875077f8f7c4fff59f469 (commit)
  discards  1801e182be76647235bf237f614f80ad033528e1 (commit)
  discards  b04314022bee0447dee03457a453920c505008b1 (commit)
  discards  cacf0b4e45542288adcc83a173f6d48f04b32ab6 (commit)
  discards  4800c10a866d7576410beca411234001c93b858b (commit)
       via  e1385430ac8a650aca04e95abc451d81c2c88cf5 (commit)
       via  aabdbefd374565d5203d414f46eadc8345c022bf (commit)
       via  49dbada3eb269212cdfb38bcae07781e141453fc (commit)
       via  5522d7db6279de7b48fbb734107f7fcd4fed5152 (commit)
       via  93fbce38ba033404b86236101e5491fa89e6abd1 (commit)
       via  3360352ddbcbfa6d89e80f4aae0e20eddd96daf8 (commit)
       via  fdc9a9308c646d23ec50073833f141ceebf78613 (commit)
       via  2b324ca45515fbf079afb15db821f437ee6b64a5 (commit)
       via  74bfe6e5794b42d3158d9358245802e73942b909 (commit)
       via  d1c72b300d50f071106a320a807f465c251a812e (commit)
       via  baeb7dbe5929012dea22985b11ae4c5584f76891 (commit)
       via  19199a75e41004ea776622c305c3ca43e5367bf2 (commit)
       via  ddd02b0a536cbc0e80b77df43939e30f294126b5 (commit)
       via  090b515aa10dba597b20a96797f17688f582a529 (commit)
       via  fcbb743e3de63e93280f2fbeedea49f98430d26f (commit)
       via  e8b7fbbd711836c59824327364e15ef2253a5848 (commit)
       via  1fc6d7713baabfe85b49191e156b6c093d22b69f (commit)
       via  d4807ea755bcd122388189815b0b6d5aaa295509 (commit)
       via  3f5b70e78c4008e0de876e53fc7fbbc0671a6937 (commit)
       via  3a8714e6fcf41c46d1fde0a6a3e4beb1367d181d (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (741f862e9efd61320b0375076feb41ad49a2d1c8)
            \
             N -- N -- N (e1385430ac8a650aca04e95abc451d81c2c88cf5)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e1385430ac8a650aca04e95abc451d81c2c88cf5
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Feb 9 22:44:02 2016 -0500

    8341: Retrieve only the log attributes that actually get used.

diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py
index 564e820..2b6ebce 100644
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -48,6 +48,7 @@ class LiveLogReader(object):
                     limit=1000,
                     order=['id asc'],
                     filters=filters + [['id','>',str(last_id)]],
+                    select=['id', 'properties'],
                 ).execute(num_retries=2)
                 got += len(page['items'])
                 logger.debug(

commit aabdbefd374565d5203d414f46eadc8345c022bf
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Feb 9 13:53:38 2016 -0500

    8341: In pipeline mode, process all jobs concurrently.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index cf748ff..1a6a41c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -10,6 +10,7 @@ import itertools
 import math
 import re
 import sys
+import threading
 
 from arvados.api import OrderedJsonModel
 from crunchstat_summary import logger
@@ -378,8 +379,14 @@ class PipelineSummarizer(object):
         self.label = pipeline_instance_uuid
 
     def run(self):
+        threads = []
         for summarizer in self.summarizers.itervalues():
-            summarizer.run()
+            t = threading.Thread(target=summarizer.run)
+            t.daemon = True
+            t.start()
+            threads.append(t)
+        for t in threads:
+            t.join()
 
     def text_report(self):
         txt = ''

commit 49dbada3eb269212cdfb38bcae07781e141453fc
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Feb 9 11:49:35 2016 -0500

    8341: Include Keep network activity in net stats.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index a8d3fd0..cf748ff 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -37,8 +37,7 @@ class Summarizer(object):
 
         # stats_max: {category: {stat: val}}
         self.stats_max = collections.defaultdict(
-            functools.partial(collections.defaultdict,
-                              lambda: float('-Inf')))
+            functools.partial(collections.defaultdict, lambda: 0))
         # task_stats: {task_id: {category: {stat: val}}}
         self.task_stats = collections.defaultdict(
             functools.partial(collections.defaultdict, dict))
@@ -232,10 +231,12 @@ class Summarizer(object):
                  self.stats_max['mem']['rss'],
                  lambda x: x / 1e9),
                 ('Max network traffic in a single task: {}GB',
-                 self.stats_max['net:eth0']['tx+rx'],
+                 self.stats_max['net:eth0']['tx+rx'] +
+                 self.stats_max['net:keep0']['tx+rx'],
                  lambda x: x / 1e9),
                 ('Max network speed in a single interval: {}MB/s',
-                 self.stats_max['net:eth0']['tx+rx__rate'],
+                 self.stats_max['net:eth0']['tx+rx__rate'] +
+                 self.stats_max['net:keep0']['tx+rx__rate'],
                  lambda x: x / 1e6)):
             format_string, val, transform = args
             if val == float('-Inf'):

commit 5522d7db6279de7b48fbb734107f7fcd4fed5152
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Feb 9 10:27:47 2016 -0500

    8341: Fix up debug labels. Avoid deadlock after exceptions in thread.

diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py
index 049b48f..564e820 100644
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -17,6 +17,10 @@ class CollectionReader(object):
                 "collection {} has {} files; need exactly one".format(
                     collection_id, len(filenames)))
         self._reader = collection.open(filenames[0])
+        self._label = "{}/{}".format(collection_id, filenames[0])
+
+    def __str__(self):
+        return self._label
 
     def __iter__(self):
         return iter(self._reader)
@@ -27,33 +31,38 @@ class LiveLogReader(object):
 
     def __init__(self, job_uuid):
         logger.debug('load stderr events for job %s', job_uuid)
-        self._filters = [
-            ['object_uuid', '=', job_uuid],
-            ['event_type', '=', 'stderr']]
-        self._label = job_uuid
+        self.job_uuid = job_uuid
+
+    def __str__(self):
+        return self.job_uuid
 
     def _get_all_pages(self):
         got = 0
         last_id = 0
-        while True:
-            page = arvados.api().logs().index(
-                limit=1000,
-                order=['id asc'],
-                filters=self._filters + [['id','>',str(last_id)]],
-            ).execute(num_retries=2)
-            got += len(page['items'])
-            logger.debug(
-                '%s: received %d of %d log events',
-                self._label, got,
-                got + page['items_available'] - len(page['items']))
-            for i in page['items']:
-                for line in i['properties']['text'].split('\n'):
-                    self._queue.put(line+'\n')
-                last_id = i['id']
-            if (len(page['items']) == 0 or
-                len(page['items']) >= page['items_available']):
-                break
-        self._queue.put(self.EOF)
+        filters = [
+            ['object_uuid', '=', self.job_uuid],
+            ['event_type', '=', 'stderr']]
+        try:
+            while True:
+                page = arvados.api().logs().index(
+                    limit=1000,
+                    order=['id asc'],
+                    filters=filters + [['id','>',str(last_id)]],
+                ).execute(num_retries=2)
+                got += len(page['items'])
+                logger.debug(
+                    '%s: received %d of %d log events',
+                    self.job_uuid, got,
+                    got + page['items_available'] - len(page['items']))
+                for i in page['items']:
+                    for line in i['properties']['text'].split('\n'):
+                        self._queue.put(line+'\n')
+                    last_id = i['id']
+                if (len(page['items']) == 0 or
+                    len(page['items']) >= page['items_available']):
+                    break
+        finally:
+            self._queue.put(self.EOF)
 
     def __iter__(self):
         self._queue = Queue.Queue()
@@ -65,5 +74,6 @@ class LiveLogReader(object):
     def next(self):
         line = self._queue.get()
         if line is self.EOF:
+            self._thread.join()
             raise StopIteration
         return line
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 70b85f8..a8d3fd0 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -52,10 +52,10 @@ class Summarizer(object):
         # constructor will overwrite this with something useful.
         self.existing_constraints = {}
 
-        logger.debug("%s: logdata %s", self.label, repr(logdata))
+        logger.debug("%s: logdata %s", self.label, logdata)
 
     def run(self):
-        logger.debug("%s: parsing log data", self.label)
+        logger.debug("%s: parsing logdata %s", self.label, self._logdata)
         for line in self._logdata:
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) job_task (?P<task_uuid>\S+)$', line)
             if m:

commit 93fbce38ba033404b86236101e5491fa89e6abd1
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Feb 8 15:47:02 2016 -0500

    8341: Do not round up Y axis to even numbers, just use max series value.
    
    Remove Y axis labels (so X axis matches other graphs from the same
    job), add grid lines.

diff --git a/tools/crunchstat-summary/crunchstat_summary/chartjs.py b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
index fb30041..03e45e6 100644
--- a/tools/crunchstat-summary/crunchstat_summary/chartjs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/chartjs.py
@@ -2,6 +2,7 @@ from __future__ import print_function
 
 import cgi
 import json
+import math
 import pkg_resources
 
 from crunchstat_summary import logger
@@ -35,12 +36,25 @@ class ChartJS(object):
             }
             for s in self.summarizers]
 
+    def _axisY(self, tasks, stat):
+        ymax = 1
+        for task in tasks.itervalues():
+            for pt in task.series[stat]:
+                ymax = max(ymax, pt[1])
+        ytick = math.exp((1+math.floor(math.log(ymax, 2)))*math.log(2))/4
+        return {
+            'gridColor': '#cccccc',
+            'gridThickness': 1,
+            'interval': ytick,
+            'minimum': 0,
+            'maximum': ymax,
+            'valueFormatString': "''",
+        }
+
     def charts(self, label, tasks):
         return [
             {
-                'axisY': {
-                    'minimum': 0,
-                },
+                'axisY': self._axisY(tasks=tasks, stat=stat),
                 'data': [
                     {
                         'type': 'line',

commit 3360352ddbcbfa6d89e80f4aae0e20eddd96daf8
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Feb 8 09:56:10 2016 -0500

    8341: Use "time since job start", not "time since task start", as X axis.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 48bec6a..70b85f8 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -87,6 +87,7 @@ class Summarizer(object):
                 child_summarizer.stats_max = self.stats_max
                 child_summarizer.task_stats = self.task_stats
                 child_summarizer.tasks = self.tasks
+                child_summarizer.starttime = self.starttime
                 child_summarizer.run()
                 logger.debug('%s: done %s', self.label, uuid)
                 continue
@@ -160,11 +161,11 @@ class Summarizer(object):
                             val = val / this_interval_s
                             if stat in ['user+sys__rate', 'tx+rx__rate']:
                                 task.series[category, stat].append(
-                                    (timestamp - task.starttime, val))
+                                    (timestamp - self.starttime, val))
                     else:
                         if stat in ['rss']:
                             task.series[category, stat].append(
-                                (timestamp - task.starttime, val))
+                                (timestamp - self.starttime, val))
                         self.task_stats[task_id][category][stat] = val
                     if val > self.stats_max[category][stat]:
                         self.stats_max[category][stat] = val

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list