[arvados] updated: 2.7.0-6010-g0f9d5cc0b9

Thu Feb 22 18:20:28 UTC 2024

Summary of changes:
 sdk/cwl/arvados_cwl/arvcontainer.py                |  15 +-
 sdk/cwl/arvados_cwl/executor.py                    |   2 +-
 .../crunchstat_summary/command.py                  |   2 +
 .../crunchstat_summary/reader.py                   |  19 +-
 .../crunchstat_summary/summarizer.py               | 269 +++++++++++++--------
 5 files changed, 190 insertions(+), 117 deletions(-)

       via  0f9d5cc0b9a83c99ebc1c63fd24b539bc37bbb63 (commit)
       via  c5489cb1052e68d1d0db71224ed09f1499ee776f (commit)
       via  3ecbf55b4c3389115f6aa8db4839c522d3b3b288 (commit)
       via  99dd11ec75589f79c8c0abc185deba6951936373 (commit)
       via  528982e8b2d20552aefda334f49cc38469547294 (commit)
       via  e2b5933fa0c48d4747680722e1ce155bc316e6a0 (commit)
      from  daf28acf73a01fcff9e136d0036ace877af85e25 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 0f9d5cc0b9a83c99ebc1c63fd24b539bc37bbb63
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 22 13:19:47 2024 -0500

    19744: Don't warn about missing data when the runtime is short
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 371dc20cd3..9b6e5f1690 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -432,6 +432,11 @@ class Summarizer(object):
 
     def _recommend_gen(self, recommendformat):
         # TODO recommend fixing job granularity if elapsed time is too short
+
+        if self.stats_max['time'].get('elapsed', 0) <= 20:
+            # Not enough data
+            return []
+
         return itertools.chain(
             self._recommend_cpu(recommendformat),
             self._recommend_ram(recommendformat),

commit c5489cb1052e68d1d0db71224ed09f1499ee776f
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 22 11:30:53 2024 -0500

    19744: Don't warn about missing metrics when the elapsed time is short
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index a67b72f89c..371dc20cd3 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -251,26 +251,29 @@ class Summarizer(object):
                     self.job_tot[category][stat] += val
         logger.debug('%s: done totals', self.label)
 
-        missing_category = {
-            'cpu': 'CPU',
-            'mem': 'memory',
-            'net:': 'network I/O',
-            'statfs': 'storage space',
-        }
-        for task_stat in self.task_stats.values():
-            for category in task_stat.keys():
-                for checkcat in missing_category:
-                    if checkcat.endswith(':'):
-                        if category.startswith(checkcat):
-                            missing_category.pop(checkcat)
-                            break
-                    else:
-                        if category == checkcat:
-                            missing_category.pop(checkcat)
-                            break
-        for catlabel in missing_category.values():
-            logger.warning('%s: %s stats are missing -- possible cluster configuration issue',
-                        self.label, catlabel)
+        if self.stats_max['time'].get('elapsed', 0) > 20:
+            # needs to have executed for at least 20 seconds or we may
+            # not have collected any metrics and these warnings are duds.
+            missing_category = {
+                'cpu': 'CPU',
+                'mem': 'memory',
+                'net:': 'network I/O',
+                'statfs': 'storage space',
+            }
+            for task_stat in self.task_stats.values():
+                for category in task_stat.keys():
+                    for checkcat in missing_category:
+                        if checkcat.endswith(':'):
+                            if category.startswith(checkcat):
+                                missing_category.pop(checkcat)
+                                break
+                        else:
+                            if category == checkcat:
+                                missing_category.pop(checkcat)
+                                break
+            for catlabel in missing_category.values():
+                logger.warning('%s: %s stats are missing -- possible cluster configuration issue',
+                            self.label, catlabel)
 
     def long_label(self):
         label = self.label

commit 3ecbf55b4c3389115f6aa8db4839c522d3b3b288
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 22 11:23:01 2024 -0500

    19744: Don't post crunchstat summary warnings to runtime status
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 2db6a9bfe2..2c0120793f 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -70,7 +70,7 @@ class RuntimeStatusLoggingHandler(logging.Handler):
             kind = 'error'
         elif record.levelno >= logging.WARNING:
             kind = 'warning'
-        if kind == 'warning' and record.name == "salad":
+        if kind == 'warning' and (record.name == "salad" or record.name == "crunchstat_summary"):
             # Don't send validation warnings to runtime status,
             # they're noisy and unhelpful.
             return

commit 99dd11ec75589f79c8c0abc185deba6951936373
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Feb 22 11:02:38 2024 -0500

    19744: Fix up threading
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index 71eae51996..4ece5c3b2e 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -7,6 +7,7 @@ import gzip
 from io import open
 import logging
 import sys
+import arvados
 
 from crunchstat_summary import logger, summarizer
 from crunchstat_summary._version import __version__
@@ -86,6 +87,7 @@ class Command(object):
         kwargs = {
             'skip_child_jobs': self.args.skip_child_jobs,
             'threads': self.args.threads,
+            'arv': arvados.api('v1')
         }
         if self.args.pipeline_instance:
             self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index a721ff36b6..a67b72f89c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -12,8 +12,10 @@ import itertools
 import math
 import re
 import sys
-import threading
 import _strptime
+import arvados.util
+
+from concurrent.futures import ThreadPoolExecutor
 
 from crunchstat_summary import logger
 
@@ -63,8 +65,9 @@ class Summarizer(object):
         # are already suitable.  If applicable, the subclass
         # constructor will overwrite this with something useful.
         self.existing_constraints = {}
+        self.node_info = {}
 
-        logger.debug("%s: logdata %s", self.label, logdata)
+        logger.info("%s: logdata %s", self.label, logdata)
 
     def run(self):
         logger.debug("%s: parsing logdata %s", self.label, self._logdata)
@@ -73,6 +76,10 @@ class Summarizer(object):
 
     def _run(self, logdata):
         self.detected_crunch1 = False
+
+        if not self.node_info:
+            self.node_info = logdata.node_info()
+
         for line in logdata:
             if not self.detected_crunch1 and '-8i9sb-' in line:
                 self.detected_crunch1 = True
@@ -639,8 +646,6 @@ class ProcessSummarizer(Summarizer):
             uuid = self.process.get('container_uuid', self.process.get('uuid'))
             rdr = crunchstat_summary.reader.LiveLogReader(uuid)
             label = label + ' (partial)'
-        else:
-            self.node_info = rdr.node_info()
 
         super(ProcessSummarizer, self).__init__(rdr, label=label, **kwargs)
         self.existing_constraints = self.process.get('runtime_constraints', {})
@@ -664,26 +669,26 @@ class ContainerRequestSummarizer(ProcessSummarizer):
 
 class MultiSummarizer(object):
     def __init__(self, children={}, label=None, threads=1, **kwargs):
-        self.throttle = threading.Semaphore(threads)
         self.children = children
         self.label = label
-
-    def run_and_release(self, target, *args, **kwargs):
-        try:
-            return target(*args, **kwargs)
-        finally:
-            self.throttle.release()
+        self.threadcount = threads
 
     def run(self):
-        threads = []
-        for child in self.children.values():
-            self.throttle.acquire()
-            t = threading.Thread(target=self.run_and_release, args=(child.run, ))
-            t.daemon = True
-            t.start()
-            threads.append(t)
-        for t in threads:
-            t.join()
+        if self.threadcount > 1 and len(self.children) > 1:
+            completed = 0
+            def run_and_progress(child):
+                try:
+                    child.run()
+                except Exception as e:
+                    logger.exception("parse error")
+                completed += 1
+                logger.info("%s/%s summarized %s", completed, len(self.children), child.label)
+            with ThreadPoolExecutor(max_workers=self.threadcount) as tpe:
+                for child in self.children.values():
+                    tpe.submit(run_and_progress, child)
+        else:
+            for child in self.children.values():
+                child.run()
 
     def text_report(self):
         txt = ''
@@ -791,22 +796,15 @@ class ContainerRequestTreeSummarizer(MultiSummarizer):
             summer.sort_key = sort_key
             children[current['uuid']] = summer
 
-            page_filters = []
-            while True:
-                child_crs = arv.container_requests().index(
-                    order=['uuid asc'],
-                    filters=page_filters+[
-                        ['requesting_container_uuid', '=', current['container_uuid']]],
-                ).execute()
-                if not child_crs['items']:
-                    break
-                elif skip_child_jobs:
-                    logger.warning('%s: omitting stats from %d child containers'
-                                   ' because --skip-child-jobs flag is on',
-                                   label, child_crs['items_available'])
-                    break
-                page_filters = [['uuid', '>', child_crs['items'][-1]['uuid']]]
-                for cr in child_crs['items']:
+            if skip_child_jobs:
+                child_crs = arv.container_requests().list(filters=[['requesting_container_uuid', '=', current['container_uuid']]],
+                                                          limit=0).execute()
+                logger.warning('%s: omitting stats from child containers'
+                               ' because --skip-child-jobs flag is on',
+                               label, child_crs['items_available'])
+            else:
+                for cr in arvados.util.keyset_list_all(arv.container_requests().list,
+                                                       filters=[['requesting_container_uuid', '=', current['container_uuid']]]):
                     if cr['container_uuid']:
                         logger.debug('%s: container req %s', current['uuid'], cr['uuid'])
                         cr['name'] = cr.get('name') or cr['uuid']

commit 528982e8b2d20552aefda334f49cc38469547294
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Feb 21 19:05:36 2024 -0500

    19744: Incorporate runtime constraints and node info into report
    
    This makes it much easier to judge CPU and RAM utilization.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 97b4e45225..bf1199a054 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -541,7 +541,7 @@ class ArvadosContainer(JobBase):
                         mr.write(summerizer.html_report())
                     logc.save()
                 except Exception as e:
-                    logger.error("%s unable to generate resource usage report",
+                    logger.warning("%s unable to generate resource usage report",
                                  self.arvrunner.label(self),
                                  exc_info=(e if self.arvrunner.debug else False))
 
diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py
index 8ccdbc2fcf..2af770f51b 100644
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -4,6 +4,7 @@
 
 import arvados
 import itertools
+import json
 import queue
 import threading
 
@@ -11,24 +12,26 @@ from crunchstat_summary import logger
 
 
 class CollectionReader(object):
-    def __init__(self, collection_id):
+    def __init__(self, collection_id, api_client=None):
         self._collection_id = collection_id
         self._label = collection_id
         self._readers = []
+        self._api_client = api_client
+        self._collection = arvados.collection.CollectionReader(self._collection_id, api_client=self._api_client)
 
     def __str__(self):
         return self._label
 
     def __iter__(self):
         logger.debug('load collection %s', self._collection_id)
-        collection = arvados.collection.CollectionReader(self._collection_id)
-        filenames = [filename for filename in collection]
+
+        filenames = [filename for filename in self._collection]
         # Crunch2 has multiple stats files
         if len(filenames) > 1:
             filenames = ['crunchstat.txt', 'arv-mount.txt']
         for filename in filenames:
             try:
-                self._readers.append(collection.open(filename))
+                self._readers.append(self._collection.open(filename))
             except IOError:
                 logger.warn('Unable to open %s', filename)
         self._label = "{}/{}".format(self._collection_id, filenames[0])
@@ -43,6 +46,14 @@ class CollectionReader(object):
                 reader.close()
             self._readers = []
 
+    def node_info(self):
+        try:
+            with self._collection.open("node.json") as f:
+                return json.load(f)
+        except IOError:
+            logger.warn('Unable to open node.json')
+        return {}
+
 
 class LiveLogReader(object):
     EOF = None
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 062be3a65a..a721ff36b6 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -277,11 +277,11 @@ class Summarizer(object):
         label = ""
         s = (self.finishtime - self.starttime).total_seconds()
         if s > 86400:
-            label += '{}d'.format(int(s/86400))
+            label += '{}d '.format(int(s/86400))
         if s > 3600:
-            label += '{}h'.format(int(s/3600) % 24)
+            label += '{}h '.format(int(s/3600) % 24)
         if s > 60:
-            label += '{}m'.format(int(s/60) % 60)
+            label += '{}m '.format(int(s/60) % 60)
         label += '{}s'.format(int(s) % 60)
         return label
 
@@ -312,57 +312,97 @@ class Summarizer(object):
         by_single_task = ""
         if len(self.tasks) > 1:
             by_single_task = " by a single task"
+
         metrics = [
             ('Elapsed time',
              self.elapsed_time(),
              None,
              ''),
-                ('CPU time spent{}'.format(by_single_task),
-                 self.stats_max['cpu']['user+sys'],
-                 None,
-                 's'),
-                ('Max CPU usage in a single interval',
-                 self.stats_max['cpu']['user+sys__rate'],
-                 lambda x: x * 100,
-                 '%'),
-                ('Overall CPU usage',
-                 float(self.job_tot['cpu']['user+sys']) /
-                 self.job_tot['time']['elapsed']
-                 if self.job_tot['time']['elapsed'] > 0 else 0,
-                 lambda x: x * 100,
-                 '%'),
-                ('Max memory used{}'.format(by_single_task),
-                 self.stats_max['mem']['rss'],
-                 lambda x: x / 1e9,
-                 'GB'),
-                ('Max network traffic{}'.format(by_single_task),
-                 self.stats_max['net:eth0']['tx+rx'] +
-                 self.stats_max['net:keep0']['tx+rx'],
-                 lambda x: x / 1e9,
-                 'GB'),
-                ('Max network speed in a single interval',
-                 self.stats_max['net:eth0']['tx+rx__rate'] +
-                 self.stats_max['net:keep0']['tx+rx__rate'],
-                 lambda x: x / 1e6,
-                 'MB/s'),
-                ('Keep cache miss rate',
-                 (float(self.job_tot['keepcache']['miss']) /
-                 float(self.job_tot['keepcalls']['get']))
-                 if self.job_tot['keepcalls']['get'] > 0 else 0,
-                 lambda x: x * 100.0,
-                 '%'),
-                ('Keep cache utilization',
-                 (float(self.job_tot['blkio:0:0']['read']) /
-                 float(self.job_tot['net:keep0']['rx']))
-                 if self.job_tot['net:keep0']['rx'] > 0 else 0,
-                 lambda x: x * 100.0,
-                 '%'),
-               ('Temp disk utilization',
-                 (float(self.job_tot['statfs']['used']) /
-                 float(self.job_tot['statfs']['total']))
-                 if self.job_tot['statfs']['total'] > 0 else 0,
-                 lambda x: x * 100.0,
-                '%'),
+
+            ('Estimated cost',
+             '${:.3f}'.format(self.cost),
+             None,
+             '') if self.cost > 0 else None,
+
+            ('Assigned instance type',
+             self.node_info.get('ProviderType'),
+             None,
+             '') if self.node_info.get('ProviderType') else None,
+
+            ('Instance hourly price',
+             '${:.3f}'.format(self.node_info.get('Price')),
+             None,
+             '') if self.node_info.get('Price') else None,
+
+            ('Max CPU usage in a single interval',
+             self.stats_max['cpu']['user+sys__rate'],
+             lambda x: x * 100,
+             '%'),
+
+            ('Overall CPU usage',
+             float(self.job_tot['cpu']['user+sys']) /
+             self.job_tot['time']['elapsed']
+             if self.job_tot['time']['elapsed'] > 0 else 0,
+             lambda x: x * 100,
+             '%'),
+
+            ('Requested CPU cores',
+             self.existing_constraints.get(self._map_runtime_constraint('vcpus')),
+             None,
+             ''),
+
+            ('Instance VCPUs',
+             self.node_info.get('VCPUs'),
+             None,
+             '') if self.node_info.get('VCPUs') else None,
+
+            ('Max memory used{}'.format(by_single_task),
+             self.stats_max['mem']['rss'],
+             lambda x: x / 2**20,
+             'MB'),
+
+            ('Requested RAM',
+             self.existing_constraints.get(self._map_runtime_constraint('ram')),
+             lambda x: x / 2**20,
+             'MB'),
+
+            ('Maximum RAM request for this instance type',
+             (self.node_info.get('RAM') - self.arv_config.get('Containers', {}).get('ReserveExtraRAM', {}))*.95,
+             lambda x: x / 2**20,
+             'MB'),
+
+            ('Max network traffic{}'.format(by_single_task),
+             self.stats_max['net:eth0']['tx+rx'] +
+             self.stats_max['net:keep0']['tx+rx'],
+             lambda x: x / 1e9,
+             'GB'),
+
+            ('Max network speed in a single interval',
+             self.stats_max['net:eth0']['tx+rx__rate'] +
+             self.stats_max['net:keep0']['tx+rx__rate'],
+             lambda x: x / 1e6,
+             'MB/s'),
+
+            ('Keep cache miss rate',
+             (float(self.job_tot['keepcache']['miss']) /
+              float(self.job_tot['keepcalls']['get']))
+             if self.job_tot['keepcalls']['get'] > 0 else 0,
+             lambda x: x * 100.0,
+             '%'),
+
+            ('Keep cache utilization',
+             (float(self.job_tot['blkio:0:0']['read']) /
+              float(self.job_tot['net:keep0']['rx']))
+             if self.job_tot['net:keep0']['rx'] > 0 else 0,
+             lambda x: x * 100.0,
+             '%'),
+
+            ('Temp disk utilization',
+             (float(self.job_tot['statfs']['used']) /
+              float(self.job_tot['statfs']['total']))
+             if self.job_tot['statfs']['total'] > 0 else 0,
+             lambda x: x * 100.0,
+             '%'),
         ]
 
         if len(self.tasks) > 1:
@@ -371,6 +411,8 @@ class Summarizer(object):
                  None,
                  ''))
         for args in metrics:
+            if args is None:
+                continue
             format_string, val, transform, suffix = args
             if val == float('-Inf'):
                 continue
@@ -581,6 +623,7 @@ class ProcessSummarizer(Summarizer):
     def __init__(self, process, label=None, **kwargs):
         rdr = None
         self.process = process
+        arv = kwargs.get("arv") or arvados.api('v1')
         if label is None:
             label = self.process.get('name', self.process['uuid'])
         # Pre-Arvados v1.4 everything is in 'log'
@@ -588,7 +631,7 @@ class ProcessSummarizer(Summarizer):
         log_collection = self.process.get('log', self.process.get('log_uuid'))
         if log_collection and self.process.get('state') != 'Uncommitted': # arvados.util.CR_UNCOMMITTED:
             try:
-                rdr = crunchstat_summary.reader.CollectionReader(log_collection)
+                rdr = crunchstat_summary.reader.CollectionReader(log_collection, api_client=arv)
             except arvados.errors.NotFoundError as e:
                 logger.warning("Trying event logs after failing to read "
                                "log collection %s: %s", self.process['log'], e)
@@ -596,8 +639,14 @@ class ProcessSummarizer(Summarizer):
             uuid = self.process.get('container_uuid', self.process.get('uuid'))
             rdr = crunchstat_summary.reader.LiveLogReader(uuid)
             label = label + ' (partial)'
+        else:
+            self.node_info = rdr.node_info()
+
         super(ProcessSummarizer, self).__init__(rdr, label=label, **kwargs)
         self.existing_constraints = self.process.get('runtime_constraints', {})
+        self.arv_config = arv.config()
+        self.cost = self.process.get('cost', 0)
+
 
 
 class JobSummarizer(ProcessSummarizer):

commit e2b5933fa0c48d4747680722e1ce155bc316e6a0
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Feb 21 16:40:41 2024 -0500

    19744: Add exception handler and pass api client to summarizer
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b7ef13070a..97b4e45225 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -534,11 +534,16 @@ class ArvadosContainer(JobBase):
             ).execute(num_retries=self.arvrunner.num_retries)
 
             if logc is not None:
-                summerizer = crunchstat_summary.summarizer.NewSummarizer(self.uuid)
-                summerizer.run()
-                with logc.open("usage_report.html", "wt") as mr:
-                    mr.write(summerizer.html_report())
-                logc.save()
+                try:
+                    summerizer = crunchstat_summary.summarizer.NewSummarizer(self.uuid, arv=self.arvrunner.api)
+                    summerizer.run()
+                    with logc.open("usage_report.html", "wt") as mr:
+                        mr.write(summerizer.html_report())
+                    logc.save()
+                except Exception as e:
+                    logger.error("%s unable to generate resource usage report",
+                                 self.arvrunner.label(self),
+                                 exc_info=(e if self.arvrunner.debug else False))
 
         except WorkflowException as e:
             # Only include a stack trace if in debug mode.
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 75d49095d9..062be3a65a 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -547,7 +547,7 @@ def NewSummarizer(process_or_uuid, **kwargs):
     else:
         uuid = process_or_uuid
         process = None
-        arv = arvados.api('v1')
+        arv = kwargs.get("arv") or arvados.api('v1')
 
     if '-dz642-' in uuid:
         if process is None:
@@ -682,7 +682,7 @@ class MultiSummarizer(object):
 class JobTreeSummarizer(MultiSummarizer):
     """Summarizes a job and all children listed in its components field."""
     def __init__(self, job, label=None, **kwargs):
-        arv = arvados.api('v1')
+        arv = kwargs.get("arv") or arvados.api('v1')
         label = label or job.get('name', job['uuid'])
         children = collections.OrderedDict()
         children[job['uuid']] = JobSummarizer(job, label=label, **kwargs)
@@ -726,7 +726,7 @@ class PipelineSummarizer(MultiSummarizer):
 
 class ContainerRequestTreeSummarizer(MultiSummarizer):
     def __init__(self, root, skip_child_jobs=False, **kwargs):
-        arv = arvados.api('v1')
+        arv = kwargs.get("arv") or arvados.api('v1')
 
         label = kwargs.pop('label', None) or root.get('name') or root['uuid']
         root['name'] = label

-----------------------------------------------------------------------


hooks/post-receive
--