[ARVADOS] created: 1.3.0-1274-gbeb17dc26

Git user git at public.curoverse.com
Tue Jul 9 17:15:09 UTC 2019


        at  beb17dc2618d32e0b48ed5af62a43b2d610a59ec (commit)


commit beb17dc2618d32e0b48ed5af62a43b2d610a59ec
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date:   Wed Jul 3 13:52:34 2019 -0400

    Add a simple temp disk utilization based recommendation.
    
    refs #13913
    
    Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index a86702ed7..305042b2b 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -335,7 +335,9 @@ class Summarizer(object):
         return itertools.chain(
             self._recommend_cpu(),
             self._recommend_ram(),
-            self._recommend_keep_cache())
+            self._recommend_keep_cache(),
+            self._recommend_temp_disk(),
+            )
 
     def _recommend_cpu(self):
         """Recommend asking for 4 cores if max CPU usage was 333%"""
@@ -438,6 +440,21 @@ class Summarizer(object):
                 math.ceil(asked_cache * 2 / self._runtime_constraint_mem_unit()))
 
 
+    def _recommend_temp_disk(self):
+        """Recommend decreasing temp disk if utilization < 50%"""
+        total = float(self.job_tot['statfs']['total'])
+        utilization = (float(self.job_tot['statfs']['used']) / total)
+
+        if utilization < 50.8:
+            yield (
+                '#!! {} max temp disk utilization was {:.0f}% of {:.0f} MiB -- '
+                'consider reducing "tmpdirMin" and/or "outdirMin"'
+            ).format(
+                self.label,
+                utilization * 100.0,
+                total / MB)
+
+
     def _format(self, val):
         """Return a string representation of a stat.
 

commit 2d58696f7dcb9c76f3d80d2e94fe5e4f05605eb8
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date:   Mon Apr 22 20:48:53 2019 -0400

    WIP Graph tmp disk usage
    
    Plots temp disk space used as well as total available.
    
    Also adds support for multiple metrics per graph and
    groups CPU and network stats by category.
    
    refs #13913
    
    Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/dygraphs.py b/tools/crunchstat-summary/crunchstat_summary/dygraphs.py
index 6df440a14..14b9ad256 100644
--- a/tools/crunchstat-summary/crunchstat_summary/dygraphs.py
+++ b/tools/crunchstat-summary/crunchstat_summary/dygraphs.py
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+import sys
+
 import crunchstat_summary.webchart
 
 
@@ -13,21 +15,34 @@ class DygraphsChart(crunchstat_summary.webchart.WebChart):
     def headHTML(self):
         return '<link rel="stylesheet" href="{}">\n'.format(self.CSS)
 
-    def chartdata(self, label, tasks, stat):
+    def chartdata(self, label, tasks, stats):
+        '''For Crunch2, label is the name of container request,
+        tasks is the top level container and
+        stats is index by a tuple of (category, metric).
+        '''
         return {
-            'data': self._collate_data(tasks, stat),
+            'data': self._collate_data(tasks, stats),
             'options': {
+                'legend': 'always',
                 'connectSeparatedPoints': True,
-                'labels': ['elapsed']+[uuid for uuid, _ in tasks.items()],
-                'title': '{}: {} {}'.format(label, stat[0], stat[1]),
+                'labels': ['elapsed'] +  stats[1],
+                'title': '{}: {}'.format(label, stats[0]),
             },
         }
 
-    def _collate_data(self, tasks, stat):
+    def _collate_data(self, tasks, stats):
         data = []
         nulls = []
+        # uuid is category for crunch2
         for uuid, task in tasks.items():
-            for pt in task.series[stat]:
-                data.append([pt[0].total_seconds()] + nulls + [pt[1]])
+            # All stats in a category are assumed to have the same time base and same number of samples
+            category = stats[0]
+            series_names = stats[1]
+            sn0 = series_names[0]
+            series = task.series[(category,sn0)]
+            for i in range(len(series)):
+                pt = series[i]
+                vals = [task.series[(category,stat)][i][1] for stat in series_names[1:]]
+                data.append([pt[0].total_seconds()] + nulls + [pt[1]] + vals)
             nulls.append(None)
         return sorted(data)
diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py
index 8ccdbc2fc..44fc0c567 100644
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -5,6 +5,7 @@
 import arvados
 import itertools
 import queue
+import sys
 import threading
 
 from crunchstat_summary import logger
@@ -25,7 +26,8 @@ class CollectionReader(object):
         filenames = [filename for filename in collection]
         # Crunch2 has multiple stats files
         if len(filenames) > 1:
-            filenames = ['crunchstat.txt', 'arv-mount.txt']
+            filenames = ['crunchstat.txt', 'hoststat.txt', 'arv-mount.txt']
+            filenames = ['hoststat.txt']
         for filename in filenames:
             try:
                 self._readers.append(collection.open(filename))
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 884f16b4a..a86702ed7 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -129,13 +129,14 @@ class Summarizer(object):
                 try:
                     self.label = m.group('job_uuid')
                 except IndexError:
-                    self.label = 'container'
-            if m.group('category').endswith(':'):
+                    self.label = 'label #1'
+            category = m.group('category')
+            if category.endswith(':'):
                 # "stderr crunchstat: notice: ..."
                 continue
-            elif m.group('category') in ('error', 'caught'):
+            elif category in ('error', 'caught'):
                 continue
-            elif m.group('category') in ('read', 'open', 'cgroup', 'CID', 'Running'):
+            elif category in ('read', 'open', 'cgroup', 'CID', 'Running'):
                 # "stderr crunchstat: read /proc/1234/net/dev: ..."
                 # (old logs are less careful with unprefixed error messages)
                 continue
@@ -221,11 +222,11 @@ class Summarizer(object):
                     if group == 'interval' and this_interval_s:
                             stat = stat + '__rate'
                             val = val / this_interval_s
-                            if stat in ['user+sys__rate', 'tx+rx__rate']:
+                            if stat in ['user+sys__rate', 'user__rate', 'sys__rate', 'tx+rx__rate', 'rx__rate', 'tx__rate']:
                                 task.series[category, stat].append(
                                     (timestamp - self.starttime, val))
                     else:
-                        if stat in ['rss']:
+                        if stat in ['rss','used','total']:
                             task.series[category, stat].append(
                                 (timestamp - self.starttime, val))
                         self.task_stats[task_id][category][stat] = val
@@ -315,7 +316,13 @@ class Summarizer(object):
                  (float(self.job_tot['blkio:0:0']['read']) /
                  float(self.job_tot['net:keep0']['rx']))
                  if self.job_tot['net:keep0']['rx'] > 0 else 0,
-                 lambda x: x * 100.0)):
+                 lambda x: x * 100.0),
+               ('Temp disk utilization {}%',
+                 (float(self.job_tot['statfs']['used']) /
+                 float(self.job_tot['statfs']['total']))
+                 if self.job_tot['statfs']['total'] > 0 else 0,
+                 lambda x: x * 100.0),
+                ):
             format_string, val, transform = args
             if val == float('-Inf'):
                 continue
diff --git a/tools/crunchstat-summary/crunchstat_summary/webchart.py b/tools/crunchstat-summary/crunchstat_summary/webchart.py
index cf0c1e67a..31afcf64e 100644
--- a/tools/crunchstat-summary/crunchstat_summary/webchart.py
+++ b/tools/crunchstat-summary/crunchstat_summary/webchart.py
@@ -45,10 +45,13 @@ class WebChart(object):
                 'label': s.long_label(),
                 'charts': [
                     self.chartdata(s.label, s.tasks, stat)
-                    for stat in (('cpu', 'user+sys__rate'),
-                                 ('mem', 'rss'),
-                                 ('net:eth0', 'tx+rx__rate'),
-                                 ('net:keep0', 'tx+rx__rate'))],
+                    for stat in (('cpu', ['user+sys__rate', 'user__rate', 'sys__rate']),
+                                 ('mem', ['rss']),
+                                 ('net:eth0', ['tx+rx__rate','rx__rate','tx__rate']),
+                                 ('net:keep0', ['tx+rx__rate','rx__rate','tx__rate']),
+                                 ('statfs', ['used', 'total']),
+                                 )
+                    ],
             }
             for s in self.summarizers]
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list