[ARVADOS] created: 1.3.0-1513-g8dae25355

Git user git at public.curoverse.com
Tue Aug 13 21:03:14 UTC 2019


        at  8dae253559240fb16564fb934fece2cbcf001866 (commit)


commit 8dae253559240fb16564fb934fece2cbcf001866
Author: Tom Morris <tfmorris at veritasgenetics.com>
Date:   Thu Jul 25 17:01:40 2019 -0400

    15486: Fix crunchstat-summary to work with Arvados v1.4+
    
    Arvados v1.4 changed things so that container logs are no longer
    accessible via PDH so we need to go to the container request and
    access the log via UUID from there.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris at veritasgenetics.com>

diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index aadc77582..ec7acb808 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -17,14 +17,20 @@ class ArgumentParser(argparse.ArgumentParser):
             description='Summarize resource usage of an Arvados Crunch job')
         src = self.add_mutually_exclusive_group()
         src.add_argument(
-            '--job', '--container', '--container-request',
+            '--job', '--container-request',
             type=str, metavar='UUID',
-            help='Look up the specified job, container, or container request '
+            help='Look up the specified job or container request '
+            'and read its log data from Keep (or from the Arvados event log, '
+            'if the job is still running)')
+        src.add_argument(
+            '--container',
+            type=str, metavar='UUID',
+            help='[Deprecated] Look up the specified container find its container request '
             'and read its log data from Keep (or from the Arvados event log, '
             'if the job is still running)')
         src.add_argument(
             '--pipeline-instance', type=str, metavar='UUID',
-            help='Summarize each component of the given pipeline instance')
+            help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
@@ -81,6 +87,8 @@ class Command(object):
             self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
         elif self.args.job:
             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
+        elif self.args.container:
+            self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = UTF8Decode(gzip.open(self.args.log_file))
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index e962ced31..d99d3c1cf 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -470,7 +470,7 @@ class Summarizer(object):
         elif self.detected_crunch1:
             return JobSummarizer.runtime_constraint_mem_unit
         else:
-            return ContainerSummarizer.runtime_constraint_mem_unit
+            return ContainerRequestSummarizer.runtime_constraint_mem_unit
 
     def _map_runtime_constraint(self, key):
         if hasattr(self, 'map_runtime_constraint'):
@@ -501,12 +501,15 @@ def NewSummarizer(process_or_uuid, **kwargs):
 
     if '-dz642-' in uuid:
         if process is None:
-            process = arv.containers().get(uuid=uuid).execute()
-        klass = ContainerTreeSummarizer
+            # Get the associated CR. Doesn't matter which since they all have the same logs
+            crs = arv.container_requests().list(filters=[['container_uuid','=',uuid]],limit=1).execute()['items']
+            if len(crs) > 0:
+                process = crs[0]
+        klass = ContainerRequestTreeSummarizer
     elif '-xvhdp-' in uuid:
         if process is None:
             process = arv.container_requests().get(uuid=uuid).execute()
-        klass = ContainerTreeSummarizer
+        klass = ContainerRequestTreeSummarizer
     elif '-8i9sb-' in uuid:
         if process is None:
             process = arv.jobs().get(uuid=uuid).execute()
@@ -530,9 +533,14 @@ class ProcessSummarizer(Summarizer):
         self.process = process
         if label is None:
             label = self.process.get('name', self.process['uuid'])
-        if self.process.get('log'):
+        # Pre-Arvados v1.4 everything is in 'log'
+        # For 1.4+ containers have no logs and container_requests have them in 'log_uuid', not 'log'
+        log_collection = self.process.get('log')
+        if not log_collection:
+            log_collection = self.process.get('log_uuid')
+        if log_collection:
             try:
-                rdr = crunchstat_summary.reader.CollectionReader(self.process['log'])
+                rdr = crunchstat_summary.reader.CollectionReader(log_collection)
             except arvados.errors.NotFoundError as e:
                 logger.warning("Trying event logs after failing to read "
                                "log collection %s: %s", self.process['log'], e)
@@ -552,7 +560,7 @@ class JobSummarizer(ProcessSummarizer):
     }
 
 
-class ContainerSummarizer(ProcessSummarizer):
+class ContainerRequestSummarizer(ProcessSummarizer):
     runtime_constraint_mem_unit = 1
 
 
@@ -653,7 +661,7 @@ class PipelineSummarizer(MultiSummarizer):
             **kwargs)
 
 
-class ContainerTreeSummarizer(MultiSummarizer):
+class ContainerRequestTreeSummarizer(MultiSummarizer):
     def __init__(self, root, skip_child_jobs=False, **kwargs):
         arv = arvados.api('v1', model=OrderedJsonModel())
 
@@ -666,10 +674,8 @@ class ContainerTreeSummarizer(MultiSummarizer):
             current = todo.popleft()
             label = current['name']
             sort_key = current['created_at']
-            if current['uuid'].find('-xvhdp-') > 0:
-                current = arv.containers().get(uuid=current['container_uuid']).execute()
 
-            summer = ContainerSummarizer(current, label=label, **kwargs)
+            summer = ContainerRequestSummarizer(current, label=label, **kwargs)
             summer.sort_key = sort_key
             children[current['uuid']] = summer
 
@@ -678,7 +684,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
                 child_crs = arv.container_requests().index(
                     order=['uuid asc'],
                     filters=page_filters+[
-                        ['requesting_container_uuid', '=', current['uuid']]],
+                        ['requesting_container_uuid', '=', current['container_uuid']]],
                 ).execute()
                 if not child_crs['items']:
                     break
@@ -696,7 +702,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
         sorted_children = collections.OrderedDict()
         for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
             sorted_children[uuid] = children[uuid]
-        super(ContainerTreeSummarizer, self).__init__(
+        super(ContainerRequestTreeSummarizer, self).__init__(
             children=sorted_children,
             label=root['name'],
             **kwargs)
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz
deleted file mode 100644
index ff7dd30b2..000000000
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz
new file mode 100644
index 000000000..3fe0220ca
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report
similarity index 100%
rename from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report
rename to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz
similarity index 66%
rename from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz
rename to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz
index 249ad22e4..fc01ce9a8 100644
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz and b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report
similarity index 100%
copy from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
copy to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
similarity index 66%
rename from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
rename to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
index b17c70059..5152e577f 100644
--- a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
+++ b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
@@ -1,8 +1,16 @@
 category	metric	task_max	task_max_rate	job_total
+blkio:0:0	read	0	0	0
+blkio:0:0	write	0	0	0
 cpu	cpus	20	-	-
 cpu	sys	0.39	0.04	0.39
 cpu	user	2.06	0.20	2.06
 cpu	user+sys	2.45	0.24	2.45
+fuseops	read	0	0	0
+fuseops	write	0	0	0
+keepcache	hit	0	0	0
+keepcache	miss	0	0	0
+keepcalls	get	0	0	0
+keepcalls	put	0	0	0
 mem	cache	172032	-	-
 mem	pgmajfault	0	-	0
 mem	rss	69525504	-	-
@@ -10,6 +18,9 @@ mem	swap	0	-	-
 net:eth0	rx	859480	1478.97	859480
 net:eth0	tx	55888	395.71	55888
 net:eth0	tx+rx	915368	1874.69	915368
+net:keep0	rx	0	0	0
+net:keep0	tx	0	0	0
+net:keep0	tx+rx	0	0	0
 statfs	available	397744787456	-	397744787456
 statfs	total	402611240960	-	402611240960
 statfs	used	4870303744	52426.18	4866453504
@@ -24,5 +35,5 @@ time	elapsed	20	-	20
 # Keep cache miss rate 0.00%
 # Keep cache utilization 0.00%
 # Temp disk utilization 1.21%
-#!! label #1 max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
-#!! label #1 max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
+#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
diff --git a/tools/crunchstat-summary/tests/test_examples.py b/tools/crunchstat-summary/tests/test_examples.py
index 0270eaaec..fb23eab39 100644
--- a/tools/crunchstat-summary/tests/test_examples.py
+++ b/tools/crunchstat-summary/tests/test_examples.py
@@ -67,29 +67,37 @@ class SummarizeEdgeCases(unittest.TestCase):
         s.run()
 
 
-class SummarizeContainer(ReportDiff):
+class SummarizeContainerCommon(ReportDiff):
     fake_container = {
         'uuid': '9tee4-dz642-lymtndkpy39eibk',
         'created_at': '2017-08-18T14:27:25.371388141',
         'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
     }
     fake_request = {
-        'uuid': '9tee4-xvhdp-uper95jktm10d3w',
+        'uuid': '9tee4-xvhdp-kk0ja1cl8b2kr1y',
         'name': 'container',
         'created_at': '2017-08-18T14:27:25.242339223Z',
         'container_uuid': fake_container['uuid'],
-    }
-    reportfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
+        'runtime_constraints': {
+            'vcpus': 1,
+            'ram': 2621440000
+            },
+        'log_uuid' : '9tee4-4zz18-m2swj50nk0r8b6y'
+        }
+
     logfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+        TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz')
     arvmountlog = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
+        TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz')
 
     @mock.patch('arvados.collection.CollectionReader')
     @mock.patch('arvados.api')
-    def test_container(self, mock_api, mock_cr):
-        mock_api().container_requests().index().execute.return_value = {'items':[]}
+    def check_common(self, mock_api, mock_cr):
+        items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
+        # Index and list mean the same thing, but are used in different places in the
+        # code. It's fragile, but exploit that fact to distinguish the two uses.
+        mock_api().container_requests().index().execute.return_value = {'items': [] }  # child_crs
+        mock_api().container_requests().list().execute.side_effect = items # parent request
         mock_api().container_requests().get().execute.return_value = self.fake_request
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
@@ -102,12 +110,31 @@ class SummarizeContainer(ReportDiff):
                 return UTF8Decode(gzip.open(self.arvmountlog))
         mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--container', self.fake_request['uuid']])
+            self.arg_strings)
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
         self.diff_known_report(self.reportfile, cmd)
 
 
+
+class SummarizeContainer(SummarizeContainerCommon):
+    uuid = '9tee4-dz642-lymtndkpy39eibk'
+    reportfile = os.path.join(TESTS_DIR, 'container_%s.txt.gz' % uuid)
+    arg_strings = ['--container', uuid, '-v', '-v']
+
+    def test_container(self):
+        self.check_common()
+
+
+class SummarizeContainerRequest(SummarizeContainerCommon):
+    uuid = '9tee4-xvhdp-kk0ja1cl8b2kr1y'
+    reportfile = os.path.join(TESTS_DIR, 'container_request_%s.txt.gz' % uuid)
+    arg_strings = ['--container-request', uuid, '-v', '-v']
+
+    def test_container_request(self):
+        self.check_common()
+
+
 class SummarizeJob(ReportDiff):
     fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
     fake_log_id = 'fake-log-collection-id'

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list