[ARVADOS] created: 2b4a1e94e76f32be86540ed5048e8b544f9cb428

git at public.curoverse.com git at public.curoverse.com
Tue Dec 9 15:35:18 EST 2014


        at  2b4a1e94e76f32be86540ed5048e8b544f9cb428 (commit)


commit 2b4a1e94e76f32be86540ed5048e8b544f9cb428
Author: Tim Pierce <twp at curoverse.com>
Date:   Mon Dec 8 13:53:02 2014 -0500

    4499: Normalize manifest in one_task_per_input_file
    
    * arvados.job_setup.one_task_per_input_file now calls cr.normalize()
      before creating tasks.
    
    * Added unit test in test_sdk.py to confirm that the expected number of
      tasks are created when called on a normalized manifest.

diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index 060ed95..4cae20d 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -87,6 +87,7 @@ class job_setup:
             return
         job_input = current_job()['script_parameters']['input']
         cr = CollectionReader(job_input)
+        cr.normalize()
         for s in cr.all_streams():
             for f in s.all_files():
                 if input_as_path:
diff --git a/sdk/python/tests/test_sdk.py b/sdk/python/tests/test_sdk.py
new file mode 100644
index 0000000..3436a07
--- /dev/null
+++ b/sdk/python/tests/test_sdk.py
@@ -0,0 +1,43 @@
+import mock
+import os
+import unittest
+
+import arvados
+import arvados.collection
+
+class TestSDK(unittest.TestCase):
+
+    @mock.patch('arvados.api')
+    @mock.patch('arvados.current_task')
+    @mock.patch('arvados.current_job')
+    def test_one_task_per_input_file_normalize(self, mock_job, mock_task, mock_api):
+        # This manifest will be reduced from three lines to one when it is
+        # normalized.
+        nonnormalized_manifest = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
+. 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
+. 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
+"""
+        dummy_hash = 'ffffffffffffffffffffffffffffffff+0'
+
+        mock_job.return_value = {
+            'uuid': 'none',
+            'script_parameters': {
+                'input': dummy_hash
+            }
+        }
+        mock_task.return_value = {
+            'uuid': 'none',
+            'sequence': 0,
+        }
+        # mock the API client to return a collection with a nonnormalized manifest.
+        mock_api('v1').collections().get().execute.return_value = {
+            'uuid': 'zzzzz-4zz18-mockcollection0',
+            'portable_data_hash': dummy_hash,
+            'manifest_text': nonnormalized_manifest,
+        }
+
+        # Because one_task_per_input_file normalizes this collection,
+        # it should now create only one job task and not three.
+        arvados.job_setup.one_task_per_input_file(and_end_task=False)
+        mock_api('v1').job_tasks().create().execute.assert_called_once_with()
+

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list