[ARVADOS] created: 56c5c336d1bbfd0af2b2d9ad20710d6bfe2d3186

Git user git at public.curoverse.com
Wed Sep 6 16:27:30 EDT 2017


        at  56c5c336d1bbfd0af2b2d9ad20710d6bfe2d3186 (commit)


commit 56c5c336d1bbfd0af2b2d9ad20710d6bfe2d3186
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Sep 6 16:12:20 2017 -0400

    10472: Follow job trees made by arvados-cwl-runner jobs.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 33b8db9..6e08c5e 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -457,7 +457,7 @@ def NewSummarizer(process_or_uuid, **kwargs):
     elif '-8i9sb-' in uuid:
         if process is None:
             process = arv.jobs().get(uuid=uuid).execute()
-        klass = JobSummarizer
+        klass = JobTreeSummarizer
     elif '-d1hrv-' in uuid:
         if process is None:
             process = arv.pipeline_instances().get(uuid=uuid).execute()
@@ -528,7 +528,7 @@ class MultiSummarizer(object):
 
     def text_report(self):
         txt = ''
-        for cname, child in self.children.iteritems():
+        for cname, child in self._descendants():
             if len(self.children) > 1:
                 txt += '### Summary for {} ({})\n'.format(
                     cname, child.process['uuid'])
@@ -536,8 +536,50 @@ class MultiSummarizer(object):
             txt += '\n'
         return txt
 
+    def _descendants(self):
+        """Dict of self and all descendants.
+
+        Wrappers with nothing of their own to report are omitted.
+        """
+        d = collections.OrderedDict()
+        for cname, child in self.children.iteritems():
+            if isinstance(child, Summarizer):
+                d[cname] = child
+            if isinstance(child, MultiSummarizer):
+                d.update(child._descendants())
+        return d
+
     def html_report(self):
-        return WEBCHART_CLASS(self.label, self.children.itervalues()).html()
+        return WEBCHART_CLASS(self.label, self._descendants().itervalues()).html()
+
+
+class JobTreeSummarizer(MultiSummarizer):
+    """Summarizes a job and all children listed in its components field."""
+    def __init__(self, job, label=None, **kwargs):
+        arv = arvados.api('v1', model=OrderedJsonModel())
+        label = label or job.get('name', None)
+        if label is None:
+            label = job['uuid']
+        else:
+            label = ' '.join([label, job['uuid']])
+        children = collections.OrderedDict()
+        children[job['uuid']] = JobSummarizer(job, label=label, **kwargs)
+        if job['components']:
+            preloaded = {}
+            for j in arv.jobs().index(
+                    limit=len(job['components']),
+                    filters=[['uuid','in',job['components'].values()]]).execute()['items']:
+                preloaded[j['uuid']] = j
+            for cname in sorted(job['components'].keys()):
+                child_uuid = job['components'][cname]
+                j = (preloaded.get(child_uuid) or
+                     arv.jobs().get(uuid=child_uuid).execute())
+                children[child_uuid] = JobTreeSummarizer(job=j, label=cname, **kwargs)
+
+        super(JobTreeSummarizer, self).__init__(
+            children=children,
+            label=label,
+            **kwargs)
 
 
 class PipelineSummarizer(MultiSummarizer):
@@ -550,7 +592,7 @@ class PipelineSummarizer(MultiSummarizer):
             else:
                 logger.info(
                     "%s: job %s", cname, component['job']['uuid'])
-                summarizer = JobSummarizer(component['job'], **kwargs)
+                summarizer = JobTreeSummarizer(component['job'], **kwargs)
                 summarizer.label = '{} {}'.format(
                     cname, component['job']['uuid'])
                 children[cname] = summarizer

commit b0ec12e8f75cc2bd97d110f8cda055f34d04c4de
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Sep 6 16:06:39 2017 -0400

    10472: Conserve FDs by not opening collections until ready to read.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/reader.py b/tools/crunchstat-summary/crunchstat_summary/reader.py
index a0a838d..c215228 100644
--- a/tools/crunchstat-summary/crunchstat_summary/reader.py
+++ b/tools/crunchstat-summary/crunchstat_summary/reader.py
@@ -14,21 +14,32 @@ from crunchstat_summary import logger
 
 class CollectionReader(object):
     def __init__(self, collection_id):
-        logger.debug('load collection %s', collection_id)
-        collection = arvados.collection.CollectionReader(collection_id)
+        self._collection_id = collection_id
+        self._label = collection_id
+        self._reader = None
+
+    def __str__(self):
+        return self._label
+
+    def __iter__(self):
+        logger.debug('load collection %s', self._collection_id)
+        collection = arvados.collection.CollectionReader(self._collection_id)
         filenames = [filename for filename in collection]
         if len(filenames) == 1:
             filename = filenames[0]
         else:
             filename = 'crunchstat.txt'
+        self._label = "{}/{}".format(self._collection_id, filename)
         self._reader = collection.open(filename)
-        self._label = "{}/{}".format(collection_id, filename)
+        return iter(self._reader)
 
-    def __str__(self):
-        return self._label
+    def __enter__(self):
+        return self
 
-    def __iter__(self):
-        return iter(self._reader)
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self._reader:
+            self._reader.close()
+            self._reader = None
 
 
 class LiveLogReader(object):
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 4924b40..33b8db9 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -70,8 +70,12 @@ class Summarizer(object):
 
     def run(self):
         logger.debug("%s: parsing logdata %s", self.label, self._logdata)
+        with self._logdata as logdata:
+            self._run(logdata)
+
+    def _run(self, logdata):
         self.detected_crunch1 = False
-        for line in self._logdata:
+        for line in logdata:
             if not self.detected_crunch1 and '-8i9sb-' in line:
                 self.detected_crunch1 = True
 

commit 87ba27e4319010af56c0d063e0e7a246661725c6
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Aug 31 10:18:34 2017 -0400

    10472: Ignore "Running [command]" log message from crunchstat.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 33b9230..4924b40 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -128,7 +128,7 @@ class Summarizer(object):
             if m.group('category').endswith(':'):
                 # "stderr crunchstat: notice: ..."
                 continue
-            elif m.group('category') in ('error', 'caught'):
+            elif m.group('category') in ('error', 'caught', 'Running'):
                 continue
             elif m.group('category') in ['read', 'open', 'cgroup', 'CID']:
                 # "stderr crunchstat: read /proc/1234/net/dev: ..."

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list