[ARVADOS] created: d4c6c7cb5943522b235adb33859695103919601e

git at public.curoverse.com git at public.curoverse.com
Tue Jul 7 11:29:33 EDT 2015


        at  d4c6c7cb5943522b235adb33859695103919601e (commit)


commit d4c6c7cb5943522b235adb33859695103919601e
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jul 7 11:28:05 2015 -0400

    6218: Generate profiling data for a few arvados.collection.Collection scenarios.

diff --git a/sdk/python/tests/manifest_examples.py b/sdk/python/tests/manifest_examples.py
new file mode 100644
index 0000000..3bf230a
--- /dev/null
+++ b/sdk/python/tests/manifest_examples.py
@@ -0,0 +1,21 @@
+import arvados
+import arvados_testutil as tutil
+import hashlib
+
+class ManifestExamples(object):
+    def make_manifest(self,
+                      bytes_per_block=1,
+                      blocks_per_file=1,
+                      files_per_stream=1,
+                      streams=1):
+        datablip = 'x' * bytes_per_block
+        data_loc = '%s+%d'.format(hashlib.md5(datablip).hexdigest(),
+                                  bytes_per_block)
+        with tutil.mock_keep_responses(data_loc, 200):
+            coll = arvados.CollectionWriter()
+            for si in range(0, streams):
+                for fi in range(0, files_per_stream):
+                    with coll.open("stream{}/file{}.txt".format(si, fi)) as f:
+                        for bi in range(0, blocks_per_file):
+                            f.write(datablip)
+            return coll.manifest_text()
diff --git a/sdk/python/tests/performance/performance_profiler.py b/sdk/python/tests/performance/performance_profiler.py
index a36ce25..afa53ae 100644
--- a/sdk/python/tests/performance/performance_profiler.py
+++ b/sdk/python/tests/performance/performance_profiler.py
@@ -42,8 +42,8 @@ def profiled(function):
             caught = e
         pr.disable()
         ps = pstats.Stats(pr, stream=outfile)
-        ps.print_stats()
+        ps.sort_stats('time').print_stats()
         if caught:
-            raise caught
+            raise
         return ret
     return profiled_function
diff --git a/sdk/python/tests/test_benchmark_collections.py b/sdk/python/tests/test_benchmark_collections.py
new file mode 100644
index 0000000..04b1fae
--- /dev/null
+++ b/sdk/python/tests/test_benchmark_collections.py
@@ -0,0 +1,91 @@
+import arvados
+import sys
+
+import run_test_server
+import arvados_testutil as tutil
+import manifest_examples
+from performance.performance_profiler import profiled
+
+class CollectionBenchmark(run_test_server.TestCaseWithServers,
+                          tutil.ArvadosBaseTestCase,
+                          manifest_examples.ManifestExamples):
+    TEST_BLOCK_SIZE = 0
+
+    @classmethod
+    def list_recursive(cls, coll, parent_name=''):
+        """Return a list of filenames in a [sub]collection.
+
+        ["stream1/file1", "stream2/file1", ...]
+
+        """
+
+        items = []
+        for name, item in coll.items():
+            if callable(getattr(item, 'items', None)):
+                # (ugh)
+                items.extend(cls.list_recursive(item, parent_name+name+'/'))
+            else:
+                items.append(parent_name+name)
+        return items
+
+    @classmethod
+    def setUpClass(cls):
+        super(CollectionBenchmark, cls).setUpClass()
+        run_test_server.authorize_with('active')
+        cls.api_client = arvados.api('v1')
+        cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
+                                             local_store=cls.local_store)
+
+    @profiled
+    def profile_new_collection_from_manifest(self, manifest_text):
+        return arvados.collection.Collection(manifest_text)
+
+    @profiled
+    def profile_new_collection_from_server(self, uuid):
+        return arvados.collection.Collection(uuid)
+
+    @profiled
+    def profile_new_collection_from_collection_files(self, src):
+        dst = arvados.collection.Collection()
+        with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+            for name in self.list_recursive(src):
+                with src.open(name) as srcfile:
+                    with dst.open(name, 'w') as dstfile:
+                        dstfile.write(srcfile.read())
+            dst.save_new()
+
+    @profiled
+    def profile_collection_list_files(self, coll):
+        return self.list_recursive(coll)
+
+    def test_medium_sized_manifest(self):
+        """Exercise manifest-handling code.
+
+        Currently, this test puts undue emphasis on some code paths
+        that don't reflect typical use because the contrived example
+        manifest has some unusual characteristics:
+
+        * Block size is zero.
+
+        * Every block is identical, so block caching patterns are
+          unrealistic.
+
+        * Every file begins and ends at a block boundary.
+        """
+        specs = {
+            'streams': 100,
+            'files_per_stream': 100,
+            'blocks_per_file': 20,
+            'bytes_per_block': self.TEST_BLOCK_SIZE,
+        }
+        my_manifest = self.make_manifest(**specs)
+
+        coll = self.profile_new_collection_from_manifest(my_manifest)
+
+        coll.save_new()
+        self.profile_new_collection_from_server(coll.manifest_locator())
+
+        items = self.profile_collection_list_files(coll)
+        self.assertEqual(len(items), specs['streams'] * specs['files_per_stream'])
+
+        self.profile_new_collection_from_collection_files(coll)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list