[ARVADOS] created: 1.3.0-532-g678a874b1
Git user
git at public.curoverse.com
Mon Mar 18 15:11:21 UTC 2019
at 678a874b117e509705195567b17a607e7b65ac67 (commit)
commit 678a874b117e509705195567b17a607e7b65ac67
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date: Fri Mar 15 01:11:59 2019 -0400
Improve wording with direction of recommendation change.
No issue.
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 96da98cdd..bf905a394 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -343,16 +343,18 @@ class Summarizer(object):
asked_cores = self.existing_constraints.get(constraint_key)
if asked_cores is None:
asked_cores = 1
+ # TODO: This should be more nuanced in cases where max >> avg
if used_cores < asked_cores:
yield (
'#!! {} max CPU usage was {}% -- '
- 'try runtime_constraints "{}":{}'
+ 'try reducing runtime_constraints to "{}":{}'
).format(
self.label,
math.ceil(cpu_max_rate*100),
constraint_key,
int(used_cores))
+ # FIXME: This needs to be updated to account for current nodemanager algorithms
def _recommend_ram(self):
"""Recommend an economical RAM constraint for this job.
@@ -400,7 +402,7 @@ class Summarizer(object):
math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib))):
yield (
'#!! {} max RSS was {} MiB -- '
- 'try runtime_constraints "{}":{}'
+ 'try reducing runtime_constraints to "{}":{}'
).format(
self.label,
int(used_mib),
@@ -420,7 +422,7 @@ class Summarizer(object):
if utilization < 0.8:
yield (
'#!! {} Keep cache utilization was {:.2f}% -- '
- 'try runtime_constraints "{}":{} (or more)'
+ 'try doubling runtime_constraints to "{}":{} (or more)'
).format(
self.label,
utilization * 100.0,
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
index 3f53126e6..c64c34c80 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
@@ -23,4 +23,4 @@ time elapsed 20 - 20
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
index 3b0d6a3f7..3075c24b9 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
@@ -34,4 +34,4 @@ time elapsed 20 - 20
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index eee771519..5e3ad152f 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -31,4 +31,4 @@ time elapsed 80 - 80
# Max network speed in a single interval: 42.58MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index c8b677450..e260ca5bd 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -20,4 +20,4 @@ time elapsed 2 - 4
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index b138b1893..ffe107225 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -20,4 +20,4 @@ time elapsed 2 - 3
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
+#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try reducing runtime_constraints to "min_ram_mb_per_node":972
commit b8a3b3f1f4f91bcfcd2dbe9aa852499886199f14
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date: Fri Mar 15 00:47:38 2019 -0400
Use default of 1 if no cores requested. refs #12026
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 6d1fd918e..96da98cdd 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -341,7 +341,9 @@ class Summarizer(object):
# take average CPU usage into account as well or % time at max
used_cores = max(1, int(math.ceil(cpu_max_rate)))
asked_cores = self.existing_constraints.get(constraint_key)
- if asked_cores is None or used_cores < asked_cores:
+ if asked_cores is None:
+ asked_cores = 1
+ if used_cores < asked_cores:
yield (
'#!! {} max CPU usage was {}% -- '
'try runtime_constraints "{}":{}'
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
index b61da154f..3f53126e6 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
@@ -23,5 +23,4 @@ time elapsed 20 - 20
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! container max CPU usage was 24% -- try runtime_constraints "vcpus":1
#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
index 9d3cd78d3..3b0d6a3f7 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
@@ -34,5 +34,4 @@ time elapsed 20 - 20
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! container max CPU usage was 24% -- try runtime_constraints "vcpus":1
#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
index f0a60957b..eee771519 100644
--- a/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151204190335.txt.gz.report
@@ -31,5 +31,4 @@ time elapsed 80 - 80
# Max network speed in a single interval: 42.58MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! 4xphq-8i9sb-jq0ekny1xou3zoh max CPU usage was 13% -- try runtime_constraints "min_cores_per_node":1
#!! 4xphq-8i9sb-jq0ekny1xou3zoh max RSS was 334 MiB -- try runtime_constraints "min_ram_mb_per_node":972
commit 11d52d78ffdaa572f47ace063c20f084a10c9b1e
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date: Fri Mar 15 00:10:06 2019 -0400
Don't recommend RAM or CPU changes based on zero usage. refs #10570
Never recommend 0 MB RAM. If CPU usage is zero, don't make any
CPU recommendations.
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 51f7e43e0..6d1fd918e 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -323,6 +323,7 @@ class Summarizer(object):
yield "# "+format_string.format(self._format(val))
def _recommend_gen(self):
+ # TODO recommend fixing job granularity if elapsed time is too short
return itertools.chain(
self._recommend_cpu(),
self._recommend_ram(),
@@ -333,9 +334,11 @@ class Summarizer(object):
constraint_key = self._map_runtime_constraint('vcpus')
cpu_max_rate = self.stats_max['cpu']['user+sys__rate']
- if cpu_max_rate == float('-Inf'):
+ if cpu_max_rate == float('-Inf') or cpu_max_rate == 0.0:
logger.warning('%s: no CPU usage data', self.label)
return
+ # TODO Don't necessarily want to recommend on isolated max peak
+ # take average CPU usage into account as well or % time at max
used_cores = max(1, int(math.ceil(cpu_max_rate)))
asked_cores = self.existing_constraints.get(constraint_key)
if asked_cores is None or used_cores < asked_cores:
@@ -391,8 +394,8 @@ class Summarizer(object):
asked_mib = self.existing_constraints.get(constraint_key)
nearlygibs = lambda mebibytes: mebibytes/AVAILABLE_RAM_RATIO/1024
- if asked_mib is None or (
- math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib)):
+ if used_mib > 0 and (asked_mib is None or (
+ math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib))):
yield (
'#!! {} max RSS was {} MiB -- '
'try runtime_constraints "{}":{}'
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report
index 5148a4523..0691e4f1e 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report
@@ -20,5 +20,3 @@ time elapsed 10 - 10
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! container max CPU usage was 0% -- try runtime_constraints "vcpus":1
-#!! container max RSS was 0 MiB -- try runtime_constraints "ram":0
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
index f9a34cfb9..c8b677450 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063411.txt.gz.report
@@ -20,5 +20,4 @@ time elapsed 2 - 4
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! 4xphq-8i9sb-zvb2ocfycpomrup max CPU usage was 0% -- try runtime_constraints "min_cores_per_node":1
#!! 4xphq-8i9sb-zvb2ocfycpomrup max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
diff --git a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
index c54102d78..b138b1893 100644
--- a/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
+++ b/tools/crunchstat-summary/tests/logfile_20151210063439.txt.gz.report
@@ -20,5 +20,4 @@ time elapsed 2 - 3
# Max network speed in a single interval: 0.00MB/s
# Keep cache miss rate 0.00%
# Keep cache utilization 0.00%
-#!! 4xphq-8i9sb-v831jm2uq0g2g9x max CPU usage was 0% -- try runtime_constraints "min_cores_per_node":1
#!! 4xphq-8i9sb-v831jm2uq0g2g9x max RSS was 1 MiB -- try runtime_constraints "min_ram_mb_per_node":972
commit 22bbbebea55236d7d906028c4486914253323d64
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date: Fri Mar 15 00:03:59 2019 -0400
Fix Keep cache recommendation calculation.
Remove extra MB (2**20) factor. Refs #14451
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 28afc9e0d..51f7e43e0 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -409,7 +409,8 @@ class Summarizer(object):
return
utilization = (float(self.job_tot['blkio:0:0']['read']) /
float(self.job_tot['net:keep0']['rx']))
- asked_cache = self.existing_constraints.get(constraint_key, 256)
+ # FIXME: the default on this get won't work correctly
+ asked_cache = self.existing_constraints.get(constraint_key, 256) * self._runtime_constraint_mem_unit()
if utilization < 0.8:
yield (
@@ -419,7 +420,7 @@ class Summarizer(object):
self.label,
utilization * 100.0,
constraint_key,
- asked_cache*2*(MB)/self._runtime_constraint_mem_unit())
+ math.ceil(asked_cache * 2 / self._runtime_constraint_mem_unit()))
def _format(self, val):
commit f5a65f7d9b7a95dc9d4e865e950f4324fc653606
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date: Thu Mar 14 22:11:45 2019 -0400
Spell MB consistently.
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index bcdc871a4..28afc9e0d 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -22,7 +22,7 @@ from crunchstat_summary import logger
# number of GiB. (Actual nodes tend to be sold in sizes like 8 GiB
# that have amounts like 7.5 GiB according to the kernel.)
AVAILABLE_RAM_RATIO = 0.95
-
+MB=2**20
# Workaround datetime.datetime.strptime() thread-safety bug by calling
# it once before starting threads. https://bugs.python.org/issue7980
@@ -387,7 +387,7 @@ class Summarizer(object):
if used_bytes == float('-Inf'):
logger.warning('%s: no memory usage data', self.label)
return
- used_mib = math.ceil(float(used_bytes) / 1048576)
+ used_mib = math.ceil(float(used_bytes) / MB)
asked_mib = self.existing_constraints.get(constraint_key)
nearlygibs = lambda mebibytes: mebibytes/AVAILABLE_RAM_RATIO/1024
@@ -400,7 +400,7 @@ class Summarizer(object):
self.label,
int(used_mib),
constraint_key,
- int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024*(2**20)/self._runtime_constraint_mem_unit()))
+ int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024*(MB)/self._runtime_constraint_mem_unit()))
def _recommend_keep_cache(self):
"""Recommend increasing keep cache if utilization < 80%"""
@@ -409,7 +409,7 @@ class Summarizer(object):
return
utilization = (float(self.job_tot['blkio:0:0']['read']) /
float(self.job_tot['net:keep0']['rx']))
- asked_mib = self.existing_constraints.get(constraint_key, 256)
+ asked_cache = self.existing_constraints.get(constraint_key, 256)
if utilization < 0.8:
yield (
@@ -419,7 +419,7 @@ class Summarizer(object):
self.label,
utilization * 100.0,
constraint_key,
- asked_mib*2*(2**20)/self._runtime_constraint_mem_unit())
+ asked_cache*2*(MB)/self._runtime_constraint_mem_unit())
def _format(self, val):
@@ -511,7 +511,7 @@ class ProcessSummarizer(Summarizer):
class JobSummarizer(ProcessSummarizer):
- runtime_constraint_mem_unit = 1048576
+ runtime_constraint_mem_unit = MB
map_runtime_constraint = {
'keep_cache_ram': 'keep_cache_mb_per_task',
'ram': 'min_ram_mb_per_node',
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list