[ARVADOS] updated: 97f5239f053b1691d7f2cb56230386921f8ea4d4
git at public.curoverse.com
git at public.curoverse.com
Thu Jul 9 23:03:26 EDT 2015
Summary of changes:
sdk/python/tests/manifest_examples.py | 21 +++++
.../tests/performance/performance_profiler.py | 4 +-
sdk/python/tests/test_benchmark_collections.py | 97 ++++++++++++++++++++++
3 files changed, 120 insertions(+), 2 deletions(-)
create mode 100644 sdk/python/tests/manifest_examples.py
create mode 100644 sdk/python/tests/test_benchmark_collections.py
via 97f5239f053b1691d7f2cb56230386921f8ea4d4 (commit)
via 1088f78459a6ac24b91673625ef72976dfb99fd3 (commit)
from 64736a9fa518af448b98e6277185acc269bd5ade (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 97f5239f053b1691d7f2cb56230386921f8ea4d4
Merge: 64736a9 1088f78
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Jul 9 23:02:59 2015 -0400
Merge branch '6218-python-sdk-perf' closes #6218
commit 1088f78459a6ac24b91673625ef72976dfb99fd3
Author: Tom Clegg <tom at curoverse.com>
Date: Tue Jul 7 11:28:05 2015 -0400
6218: Generate profiling data for a few arvados.collection.Collection scenarios.
diff --git a/sdk/python/tests/manifest_examples.py b/sdk/python/tests/manifest_examples.py
new file mode 100644
index 0000000..2d8e475
--- /dev/null
+++ b/sdk/python/tests/manifest_examples.py
@@ -0,0 +1,21 @@
+import arvados
+import arvados_testutil as tutil
+import hashlib
+
+class ManifestExamples(object):
+ def make_manifest(self,
+ bytes_per_block=1,
+ blocks_per_file=1,
+ files_per_stream=1,
+ streams=1):
+ datablip = 'x' * bytes_per_block
+ data_loc = '{}+{}'.format(hashlib.md5(datablip).hexdigest(),
+ bytes_per_block)
+ with tutil.mock_keep_responses(data_loc, 200):
+ coll = arvados.CollectionWriter()
+ for si in range(0, streams):
+ for fi in range(0, files_per_stream):
+ with coll.open("stream{}/file{}.txt".format(si, fi)) as f:
+ for bi in range(0, blocks_per_file):
+ f.write(datablip)
+ return coll.manifest_text()
diff --git a/sdk/python/tests/performance/performance_profiler.py b/sdk/python/tests/performance/performance_profiler.py
index a36ce25..afa53ae 100644
--- a/sdk/python/tests/performance/performance_profiler.py
+++ b/sdk/python/tests/performance/performance_profiler.py
@@ -42,8 +42,8 @@ def profiled(function):
caught = e
pr.disable()
ps = pstats.Stats(pr, stream=outfile)
- ps.print_stats()
+ ps.sort_stats('time').print_stats()
if caught:
- raise caught
+ raise
return ret
return profiled_function
diff --git a/sdk/python/tests/test_benchmark_collections.py b/sdk/python/tests/test_benchmark_collections.py
new file mode 100644
index 0000000..d75ad47
--- /dev/null
+++ b/sdk/python/tests/test_benchmark_collections.py
@@ -0,0 +1,97 @@
+import arvados
+import sys
+
+import run_test_server
+import arvados_testutil as tutil
+import manifest_examples
+from performance.performance_profiler import profiled
+
+class CollectionBenchmark(run_test_server.TestCaseWithServers,
+ tutil.ArvadosBaseTestCase,
+ manifest_examples.ManifestExamples):
+ MAIN_SERVER = {}
+ TEST_BLOCK_SIZE = 0
+
+ @classmethod
+ def list_recursive(cls, coll, parent_name=None):
+ if parent_name is None:
+ current_name = coll.stream_name()
+ else:
+ current_name = '{}/{}'.format(parent_name, coll.name)
+ try:
+ for name in coll:
+ for item in cls.list_recursive(coll[name], current_name):
+ yield item
+ except TypeError:
+ yield current_name
+
+ @classmethod
+ def setUpClass(cls):
+ super(CollectionBenchmark, cls).setUpClass()
+ run_test_server.authorize_with('active')
+ cls.api_client = arvados.api('v1')
+ cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
+ local_store=cls.local_store)
+
+ @profiled
+ def profile_new_collection_from_manifest(self, manifest_text):
+ return arvados.collection.Collection(manifest_text)
+
+ @profiled
+ def profile_new_collection_from_server(self, uuid):
+ return arvados.collection.Collection(uuid)
+
+ @profiled
+ def profile_new_collection_copying_bytes_from_collection(self, src):
+ dst = arvados.collection.Collection()
+ with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+ for name in self.list_recursive(src):
+ with src.open(name) as srcfile, dst.open(name, 'w') as dstfile:
+ dstfile.write(srcfile.read())
+ dst.save_new()
+
+ @profiled
+ def profile_new_collection_copying_files_from_collection(self, src):
+ dst = arvados.collection.Collection()
+ with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+ for name in self.list_recursive(src):
+ dst.copy(name, name, src)
+ dst.save_new()
+
+ @profiled
+ def profile_collection_list_files(self, coll):
+ return sum(1 for name in self.list_recursive(coll))
+
+ def test_medium_sized_manifest(self):
+ """Exercise manifest-handling code.
+
+ Currently, this test puts undue emphasis on some code paths
+ that don't reflect typical use because the contrived example
+ manifest has some unusual characteristics:
+
+ * Block size is zero.
+
+ * Every block is identical, so block caching patterns are
+ unrealistic.
+
+ * Every file begins and ends at a block boundary.
+ """
+ specs = {
+ 'streams': 100,
+ 'files_per_stream': 100,
+ 'blocks_per_file': 20,
+ 'bytes_per_block': self.TEST_BLOCK_SIZE,
+ }
+ my_manifest = self.make_manifest(**specs)
+
+ coll = self.profile_new_collection_from_manifest(my_manifest)
+
+ coll.save_new()
+ self.profile_new_collection_from_server(coll.manifest_locator())
+
+ num_items = self.profile_collection_list_files(coll)
+ self.assertEqual(num_items, specs['streams'] * specs['files_per_stream'])
+
+ self.profile_new_collection_copying_bytes_from_collection(coll)
+
+ self.profile_new_collection_copying_files_from_collection(coll)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list