[ARVADOS] created: 2b4a1e94e76f32be86540ed5048e8b544f9cb428
git at public.curoverse.com
git at public.curoverse.com
Tue Dec 9 15:35:18 EST 2014
at 2b4a1e94e76f32be86540ed5048e8b544f9cb428 (commit)
commit 2b4a1e94e76f32be86540ed5048e8b544f9cb428
Author: Tim Pierce <twp at curoverse.com>
Date: Mon Dec 8 13:53:02 2014 -0500
4499: Normalize manifest in one_task_per_input_file
* arvados.job_setup.one_task_per_input_file now calls cr.normalize()
before creating tasks.
* Added unit test in test_sdk.py to confirm that the expected number of
tasks are created when called on a normalized manifest.
diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index 060ed95..4cae20d 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -87,6 +87,7 @@ class job_setup:
return
job_input = current_job()['script_parameters']['input']
cr = CollectionReader(job_input)
+ cr.normalize()
for s in cr.all_streams():
for f in s.all_files():
if input_as_path:
diff --git a/sdk/python/tests/test_sdk.py b/sdk/python/tests/test_sdk.py
new file mode 100644
index 0000000..3436a07
--- /dev/null
+++ b/sdk/python/tests/test_sdk.py
@@ -0,0 +1,43 @@
+import mock
+import os
+import unittest
+
+import arvados
+import arvados.collection
+
+class TestSDK(unittest.TestCase):
+
+ @mock.patch('arvados.api')
+ @mock.patch('arvados.current_task')
+ @mock.patch('arvados.current_job')
+ def test_one_task_per_input_file_normalize(self, mock_job, mock_task, mock_api):
+ # This manifest will be reduced from three lines to one when it is
+ # normalized.
+ nonnormalized_manifest = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
+. 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
+. 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
+"""
+ dummy_hash = 'ffffffffffffffffffffffffffffffff+0'
+
+ mock_job.return_value = {
+ 'uuid': 'none',
+ 'script_parameters': {
+ 'input': dummy_hash
+ }
+ }
+ mock_task.return_value = {
+ 'uuid': 'none',
+ 'sequence': 0,
+ }
+ # mock the API client to return a collection with a nonnormalized manifest.
+ mock_api('v1').collections().get().execute.return_value = {
+ 'uuid': 'zzzzz-4zz18-mockcollection0',
+ 'portable_data_hash': dummy_hash,
+ 'manifest_text': nonnormalized_manifest,
+ }
+
+ # Because one_task_per_input_file normalizes this collection,
+ # it should now create only one job task and not three.
+ arvados.job_setup.one_task_per_input_file(and_end_task=False)
+ mock_api('v1').job_tasks().create().execute.assert_called_once_with()
+
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list