[arvados] updated: 2.7.0-6020-gd49af56735

Wed Feb 28 20:28:23 UTC 2024

Summary of changes:
 sdk/cwl/arvados_cwl/arvcontainer.py                |  2 +-
 sdk/cwl/arvados_cwl/context.py                     |  1 +
 sdk/cwl/arvados_cwl/executor.py                    |  5 ++++
 .../crunchstat_summary/summarizer.py               | 29 +++++++++++-----------
 4 files changed, 21 insertions(+), 16 deletions(-)

       via  d49af567353a4597e6a478ff871bdc6d3bd50f08 (commit)
       via  cd6cc155469fb54cd7d868e5bc331f13805b79c9 (commit)
      from  2512a633dac10249c351b474b80807725246144a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit d49af567353a4597e6a478ff871bdc6d3bd50f08
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Feb 28 14:53:29 2024 -0500

    19744: Report steps with low utilization at end of workflow
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index f048e505e8..c3b914ba99 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -548,7 +548,7 @@ class ArvadosContainer(JobBase):
 
                     # Post warnings about nodes that are under-utilized.
                     for rc in summarizer._recommend_gen(lambda x: x):
-                        self.usage_report_notes.append(rc)
+                        self.job_runtime.usage_report_notes.append(rc)
 
                 except Exception as e:
                     logger.warning("%s unable to generate resource usage report",
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 28ee60ac39..432b380aab 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -930,7 +930,7 @@ The 'jobs' API is no longer supported.
             raise WorkflowException("Workflow did not return a result.")
 
         if runtimeContext.usage_report_notes:
-            logger.info("Resource report notifications:")
+            logger.info("Steps with low resource utilization (possible optimization opportunities):")
             for x in runtimeContext.usage_report_notes:
                 logger.info("  %s", x)
 
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 8a2cda130b..bc41fdae33 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -483,7 +483,7 @@ class Summarizer(object):
                 '{} peak RAM usage was only {}% ({} MiB used / {} MiB requested)'
             ).format(
                 self.label,
-                int(100*(used_mib / asked_mib)),
+                int(math.ceil(100*(used_mib / asked_mib))),
                 int(used_mib),
                 int(asked_mib))
 
@@ -497,18 +497,23 @@ class Summarizer(object):
 
         if self.job_tot['net:keep0']['rx'] == 0:
             return
+
+        miss_rate = (float(self.job_tot['keepcache']['miss']) /
+                     float(self.job_tot['keepcalls']['get']))
+
         utilization = (float(self.job_tot['blkio:0:0']['read']) /
                        float(self.job_tot['net:keep0']['rx']))
         # FIXME: the default on this get won't work correctly
         asked_cache = self.existing_constraints.get('keep_cache_ram') or self.existing_constraints.get('keep_cache_disk')
 
-        if utilization < 0.5:
+        if utilization < 0.5 and miss_rate > .05:
             yield recommendformat(
-                '{} Keep cache utilization was {:.2f}% -- '
+                '{} Keep cache utilization was only {:.2f}% and miss rate was {:.2f}% -- '
                 'recommend increasing keep_cache'
             ).format(
                 self.label,
-                utilization * 100.0)
+                utilization * 100.0,
+                miss_rate * 100.0)
 
 
     def _recommend_temp_disk(self, recommendformat):

commit cd6cc155469fb54cd7d868e5bc331f13805b79c9
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Feb 28 14:27:52 2024 -0500

    19744: Remove specific recommendations
    
    Still want to note inefficient CPU/RAM usage.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index d4b1e0050d..f048e505e8 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -548,7 +548,7 @@ class ArvadosContainer(JobBase):
 
                     # Post warnings about nodes that are under-utilized.
                     for rc in summarizer._recommend_gen(lambda x: x):
-                        logger.warning(x)
+                        self.usage_report_notes.append(rc)
 
                 except Exception as e:
                     logger.warning("%s unable to generate resource usage report",
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index a90a6d48c3..60ea9bdff5 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -47,6 +47,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.print_keep_deps = False
         self.git_info = {}
         self.enable_usage_report = None
+        self.usage_report_notes = []
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 1b484a1720..28ee60ac39 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -929,6 +929,11 @@ The 'jobs' API is no longer supported.
         if self.final_output is None:
             raise WorkflowException("Workflow did not return a result.")
 
+        if runtimeContext.usage_report_notes:
+            logger.info("Resource report notifications:")
+            for x in runtimeContext.usage_report_notes:
+                logger.info("  %s", x)
+
         if runtimeContext.submit and isinstance(tool, Runner):
             logger.info("Final output collection %s", tool.final_output)
             if workbench2 or workbench1:
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index a7c2b0a383..8a2cda130b 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -421,16 +421,14 @@ class Summarizer(object):
         asked_cores = self.existing_constraints.get(constraint_key)
         if asked_cores is None:
             asked_cores = 1
-        # TODO: This should be more nuanced in cases where max >> avg
+
         if used_cores < (asked_cores*.5):
             yield recommendformat(
-                '{} max CPU usage was {}% -- '
-                'try reducing runtime_constraints to "{}":{}'
+                '{} peak CPU usage was only {}% out of possible {}% ({} cores requested)'
             ).format(
                 self.label,
                 math.ceil(cpu_max_rate*100),
-                constraint_key,
-                int(used_cores))
+                asked_cores*100, asked_cores)
 
     # FIXME: This needs to be updated to account for current a-d-c algorithms
     def _recommend_ram(self, recommendformat):
@@ -482,14 +480,12 @@ class Summarizer(object):
         recommend_mib = int(math.ceil(nearlygibs(used_mib/ratio))*AVAILABLE_RAM_RATIO*1024)
         if used_mib > 0 and (used_mib / asked_mib) < ratio and asked_mib > recommend_mib:
             yield recommendformat(
-                '{} requested {} MiB of RAM but actual RAM usage was below {}% at {} MiB -- '
-                'suggest reducing RAM request to {} MiB'
+                '{} peak RAM usage was only {}% ({} MiB used / {} MiB requested)'
             ).format(
                 self.label,
-                int(asked_mib),
-                int(100*ratio),
+                int(100*(used_mib / asked_mib)),
                 int(used_mib),
-                recommend_mib)
+                int(asked_mib))
 
     def _recommend_keep_cache(self, recommendformat):
         """Recommend increasing keep cache if utilization < 50%.
@@ -499,7 +495,6 @@ class Summarizer(object):
         arv-mount.
         """
 
-        constraint_key = self._map_runtime_constraint('keep_cache_ram')
         if self.job_tot['net:keep0']['rx'] == 0:
             return
         utilization = (float(self.job_tot['blkio:0:0']['read']) /
@@ -510,11 +505,10 @@ class Summarizer(object):
         if utilization < 0.5:
             yield recommendformat(
                 '{} Keep cache utilization was {:.2f}% -- '
-                'try increasing keep_cache to {} MB'
+                'recommend increasing keep_cache'
             ).format(
                 self.label,
-                utilization * 100.0,
-                math.ceil((asked_cache * 2) / (1024*1024)))
+                utilization * 100.0)
 
 
     def _recommend_temp_disk(self, recommendformat):

-----------------------------------------------------------------------


hooks/post-receive
--