[ARVADOS] updated: 97f5239f053b1691d7f2cb56230386921f8ea4d4

git at public.curoverse.com git at public.curoverse.com
Thu Jul 9 23:03:26 EDT 2015


Summary of changes:
 sdk/python/tests/manifest_examples.py              | 21 +++++
 .../tests/performance/performance_profiler.py      |  4 +-
 sdk/python/tests/test_benchmark_collections.py     | 97 ++++++++++++++++++++++
 3 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 sdk/python/tests/manifest_examples.py
 create mode 100644 sdk/python/tests/test_benchmark_collections.py

       via  97f5239f053b1691d7f2cb56230386921f8ea4d4 (commit)
       via  1088f78459a6ac24b91673625ef72976dfb99fd3 (commit)
      from  64736a9fa518af448b98e6277185acc269bd5ade (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 97f5239f053b1691d7f2cb56230386921f8ea4d4
Merge: 64736a9 1088f78
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 9 23:02:59 2015 -0400

    Merge branch '6218-python-sdk-perf' closes #6218


commit 1088f78459a6ac24b91673625ef72976dfb99fd3
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jul 7 11:28:05 2015 -0400

    6218: Generate profiling data for a few arvados.collection.Collection scenarios.

diff --git a/sdk/python/tests/manifest_examples.py b/sdk/python/tests/manifest_examples.py
new file mode 100644
index 0000000..2d8e475
--- /dev/null
+++ b/sdk/python/tests/manifest_examples.py
@@ -0,0 +1,21 @@
+import arvados
+import arvados_testutil as tutil
+import hashlib
+
+class ManifestExamples(object):
+    def make_manifest(self,
+                      bytes_per_block=1,
+                      blocks_per_file=1,
+                      files_per_stream=1,
+                      streams=1):
+        datablip = 'x' * bytes_per_block
+        data_loc = '{}+{}'.format(hashlib.md5(datablip).hexdigest(),
+                                  bytes_per_block)
+        with tutil.mock_keep_responses(data_loc, 200):
+            coll = arvados.CollectionWriter()
+            for si in range(0, streams):
+                for fi in range(0, files_per_stream):
+                    with coll.open("stream{}/file{}.txt".format(si, fi)) as f:
+                        for bi in range(0, blocks_per_file):
+                            f.write(datablip)
+            return coll.manifest_text()
diff --git a/sdk/python/tests/performance/performance_profiler.py b/sdk/python/tests/performance/performance_profiler.py
index a36ce25..afa53ae 100644
--- a/sdk/python/tests/performance/performance_profiler.py
+++ b/sdk/python/tests/performance/performance_profiler.py
@@ -42,8 +42,8 @@ def profiled(function):
             caught = e
         pr.disable()
         ps = pstats.Stats(pr, stream=outfile)
-        ps.print_stats()
+        ps.sort_stats('time').print_stats()
         if caught:
-            raise caught
+            raise
         return ret
     return profiled_function
diff --git a/sdk/python/tests/test_benchmark_collections.py b/sdk/python/tests/test_benchmark_collections.py
new file mode 100644
index 0000000..d75ad47
--- /dev/null
+++ b/sdk/python/tests/test_benchmark_collections.py
@@ -0,0 +1,97 @@
+import arvados
+import sys
+
+import run_test_server
+import arvados_testutil as tutil
+import manifest_examples
+from performance.performance_profiler import profiled
+
+class CollectionBenchmark(run_test_server.TestCaseWithServers,
+                          tutil.ArvadosBaseTestCase,
+                          manifest_examples.ManifestExamples):
+    MAIN_SERVER = {}
+    TEST_BLOCK_SIZE = 0
+
+    @classmethod
+    def list_recursive(cls, coll, parent_name=None):
+        if parent_name is None:
+            current_name = coll.stream_name()
+        else:
+            current_name = '{}/{}'.format(parent_name, coll.name)
+        try:
+            for name in coll:
+                for item in cls.list_recursive(coll[name], current_name):
+                    yield item
+        except TypeError:
+            yield current_name
+
+    @classmethod
+    def setUpClass(cls):
+        super(CollectionBenchmark, cls).setUpClass()
+        run_test_server.authorize_with('active')
+        cls.api_client = arvados.api('v1')
+        cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
+                                             local_store=cls.local_store)
+
+    @profiled
+    def profile_new_collection_from_manifest(self, manifest_text):
+        return arvados.collection.Collection(manifest_text)
+
+    @profiled
+    def profile_new_collection_from_server(self, uuid):
+        return arvados.collection.Collection(uuid)
+
+    @profiled
+    def profile_new_collection_copying_bytes_from_collection(self, src):
+        dst = arvados.collection.Collection()
+        with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+            for name in self.list_recursive(src):
+                with src.open(name) as srcfile, dst.open(name, 'w') as dstfile:
+                    dstfile.write(srcfile.read())
+            dst.save_new()
+
+    @profiled
+    def profile_new_collection_copying_files_from_collection(self, src):
+        dst = arvados.collection.Collection()
+        with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+            for name in self.list_recursive(src):
+                dst.copy(name, name, src)
+            dst.save_new()
+
+    @profiled
+    def profile_collection_list_files(self, coll):
+        return sum(1 for name in self.list_recursive(coll))
+
+    def test_medium_sized_manifest(self):
+        """Exercise manifest-handling code.
+
+        Currently, this test puts undue emphasis on some code paths
+        that don't reflect typical use because the contrived example
+        manifest has some unusual characteristics:
+
+        * Block size is zero.
+
+        * Every block is identical, so block caching patterns are
+          unrealistic.
+
+        * Every file begins and ends at a block boundary.
+        """
+        specs = {
+            'streams': 100,
+            'files_per_stream': 100,
+            'blocks_per_file': 20,
+            'bytes_per_block': self.TEST_BLOCK_SIZE,
+        }
+        my_manifest = self.make_manifest(**specs)
+
+        coll = self.profile_new_collection_from_manifest(my_manifest)
+
+        coll.save_new()
+        self.profile_new_collection_from_server(coll.manifest_locator())
+
+        num_items = self.profile_collection_list_files(coll)
+        self.assertEqual(num_items, specs['streams'] * specs['files_per_stream'])
+
+        self.profile_new_collection_copying_bytes_from_collection(coll)
+
+        self.profile_new_collection_copying_files_from_collection(coll)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list