[arvados] updated: 2.7.0-6020-gd49af56735
git repository hosting
git at public.arvados.org
Wed Feb 28 20:28:23 UTC 2024
Summary of changes:
sdk/cwl/arvados_cwl/arvcontainer.py | 2 +-
sdk/cwl/arvados_cwl/context.py | 1 +
sdk/cwl/arvados_cwl/executor.py | 5 ++++
.../crunchstat_summary/summarizer.py | 29 +++++++++++-----------
4 files changed, 21 insertions(+), 16 deletions(-)
via d49af567353a4597e6a478ff871bdc6d3bd50f08 (commit)
via cd6cc155469fb54cd7d868e5bc331f13805b79c9 (commit)
from 2512a633dac10249c351b474b80807725246144a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit d49af567353a4597e6a478ff871bdc6d3bd50f08
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Wed Feb 28 14:53:29 2024 -0500
19744: Report steps with low utilization at end of workflow
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index f048e505e8..c3b914ba99 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -548,7 +548,7 @@ class ArvadosContainer(JobBase):
# Post warnings about nodes that are under-utilized.
for rc in summarizer._recommend_gen(lambda x: x):
- self.usage_report_notes.append(rc)
+ self.job_runtime.usage_report_notes.append(rc)
except Exception as e:
logger.warning("%s unable to generate resource usage report",
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 28ee60ac39..432b380aab 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -930,7 +930,7 @@ The 'jobs' API is no longer supported.
raise WorkflowException("Workflow did not return a result.")
if runtimeContext.usage_report_notes:
- logger.info("Resource report notifications:")
+ logger.info("Steps with low resource utilization (possible optimization opportunities):")
for x in runtimeContext.usage_report_notes:
logger.info(" %s", x)
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 8a2cda130b..bc41fdae33 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -483,7 +483,7 @@ class Summarizer(object):
'{} peak RAM usage was only {}% ({} MiB used / {} MiB requested)'
).format(
self.label,
- int(100*(used_mib / asked_mib)),
+ int(math.ceil(100*(used_mib / asked_mib))),
int(used_mib),
int(asked_mib))
@@ -497,18 +497,23 @@ class Summarizer(object):
if self.job_tot['net:keep0']['rx'] == 0:
return
+
+ miss_rate = (float(self.job_tot['keepcache']['miss']) /
+ float(self.job_tot['keepcalls']['get']))
+
utilization = (float(self.job_tot['blkio:0:0']['read']) /
float(self.job_tot['net:keep0']['rx']))
# FIXME: the default on this get won't work correctly
asked_cache = self.existing_constraints.get('keep_cache_ram') or self.existing_constraints.get('keep_cache_disk')
- if utilization < 0.5:
+ if utilization < 0.5 and miss_rate > .05:
yield recommendformat(
- '{} Keep cache utilization was {:.2f}% -- '
+ '{} Keep cache utilization was only {:.2f}% and miss rate was {:.2f}% -- '
'recommend increasing keep_cache'
).format(
self.label,
- utilization * 100.0)
+ utilization * 100.0,
+ miss_rate * 100.0)
def _recommend_temp_disk(self, recommendformat):
commit cd6cc155469fb54cd7d868e5bc331f13805b79c9
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Wed Feb 28 14:27:52 2024 -0500
19744: Remove specific recommendations
Still want to note inefficient CPU/RAM usage.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index d4b1e0050d..f048e505e8 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -548,7 +548,7 @@ class ArvadosContainer(JobBase):
# Post warnings about nodes that are under-utilized.
for rc in summarizer._recommend_gen(lambda x: x):
- logger.warning(x)
+ self.usage_report_notes.append(rc)
except Exception as e:
logger.warning("%s unable to generate resource usage report",
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index a90a6d48c3..60ea9bdff5 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -47,6 +47,7 @@ class ArvRuntimeContext(RuntimeContext):
self.print_keep_deps = False
self.git_info = {}
self.enable_usage_report = None
+ self.usage_report_notes = []
super(ArvRuntimeContext, self).__init__(kwargs)
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 1b484a1720..28ee60ac39 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -929,6 +929,11 @@ The 'jobs' API is no longer supported.
if self.final_output is None:
raise WorkflowException("Workflow did not return a result.")
+ if runtimeContext.usage_report_notes:
+ logger.info("Resource report notifications:")
+ for x in runtimeContext.usage_report_notes:
+ logger.info(" %s", x)
+
if runtimeContext.submit and isinstance(tool, Runner):
logger.info("Final output collection %s", tool.final_output)
if workbench2 or workbench1:
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index a7c2b0a383..8a2cda130b 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -421,16 +421,14 @@ class Summarizer(object):
asked_cores = self.existing_constraints.get(constraint_key)
if asked_cores is None:
asked_cores = 1
- # TODO: This should be more nuanced in cases where max >> avg
+
if used_cores < (asked_cores*.5):
yield recommendformat(
- '{} max CPU usage was {}% -- '
- 'try reducing runtime_constraints to "{}":{}'
+ '{} peak CPU usage was only {}% out of possible {}% ({} cores requested)'
).format(
self.label,
math.ceil(cpu_max_rate*100),
- constraint_key,
- int(used_cores))
+ asked_cores*100, asked_cores)
# FIXME: This needs to be updated to account for current a-d-c algorithms
def _recommend_ram(self, recommendformat):
@@ -482,14 +480,12 @@ class Summarizer(object):
recommend_mib = int(math.ceil(nearlygibs(used_mib/ratio))*AVAILABLE_RAM_RATIO*1024)
if used_mib > 0 and (used_mib / asked_mib) < ratio and asked_mib > recommend_mib:
yield recommendformat(
- '{} requested {} MiB of RAM but actual RAM usage was below {}% at {} MiB -- '
- 'suggest reducing RAM request to {} MiB'
+ '{} peak RAM usage was only {}% ({} MiB used / {} MiB requested)'
).format(
self.label,
- int(asked_mib),
- int(100*ratio),
+ int(100*(used_mib / asked_mib)),
int(used_mib),
- recommend_mib)
+ int(asked_mib))
def _recommend_keep_cache(self, recommendformat):
"""Recommend increasing keep cache if utilization < 50%.
@@ -499,7 +495,6 @@ class Summarizer(object):
arv-mount.
"""
- constraint_key = self._map_runtime_constraint('keep_cache_ram')
if self.job_tot['net:keep0']['rx'] == 0:
return
utilization = (float(self.job_tot['blkio:0:0']['read']) /
@@ -510,11 +505,10 @@ class Summarizer(object):
if utilization < 0.5:
yield recommendformat(
'{} Keep cache utilization was {:.2f}% -- '
- 'try increasing keep_cache to {} MB'
+ 'recommend increasing keep_cache'
).format(
self.label,
- utilization * 100.0,
- math.ceil((asked_cache * 2) / (1024*1024)))
+ utilization * 100.0)
def _recommend_temp_disk(self, recommendformat):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list