[ARVADOS] updated: 1.2.0-191-g71fa8bdbd

Git user git at public.curoverse.com
Tue Nov 6 09:40:10 EST 2018


Summary of changes:
 sdk/python/arvados/arvfile.py        |  8 ++++++++
 sdk/python/arvados/collection.py     | 31 ++++++++++++++++++++++++++-----
 sdk/python/tests/test_collections.py |  7 +++++++
 3 files changed, 41 insertions(+), 5 deletions(-)

       via  71fa8bdbd8bfb44ce3551d1e69ada6f2780e815c (commit)
      from  b8addea7d6c8c464a2743191561470332eeaba13 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 71fa8bdbd8bfb44ce3551d1e69ada6f2780e815c
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date:   Tue Nov 6 11:39:14 2018 -0300

    14259: Don't scan the entire collection for remote blocks when not needed.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index f4580f346..f58c882e2 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -901,6 +901,14 @@ class ArvadosFile(object):
         return False
 
     @synchronized
+    def has_remote_blocks(self):
+        """Returns True if any of the segment's locators has a +R signature"""
+        for s in self._segments:
+            if '+R' in s.locator:
+                return True
+        return False
+
+    @synchronized
     def segments(self):
         return copy.copy(self._segments)
 
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 13301b297..d63e9424e 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -520,6 +520,7 @@ class RichCollectionBase(CollectionBase):
     def __init__(self, parent=None):
         self.parent = parent
         self._committed = False
+        self._has_remote_blocks = False
         self._callback = None
         self._items = {}
 
@@ -544,6 +545,15 @@ class RichCollectionBase(CollectionBase):
     def stream_name(self):
         raise NotImplementedError()
 
+    @synchronized
+    def has_remote_blocks(self):
+        """Recursively check for a +R segment locator signature."""
+
+        for item in self:
+            if self[item].has_remote_blocks():
+                return True
+        return False
+
     @must_be_writable
     @synchronized
     def find_or_create(self, path, create_type):
@@ -901,6 +911,8 @@ class RichCollectionBase(CollectionBase):
 
         source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
         target_dir.add(source_obj, target_name, overwrite, False)
+        if not self._has_remote_blocks and source_obj.has_remote_blocks():
+            self._has_remote_blocks = True
 
     @must_be_writable
     @synchronized
@@ -927,6 +939,8 @@ class RichCollectionBase(CollectionBase):
         if not source_obj.writable():
             raise IOError(errno.EROFS, "Source collection is read only", source)
         target_dir.add(source_obj, target_name, overwrite, True)
+        if not self._has_remote_blocks and source_obj.has_remote_blocks():
+            self._has_remote_blocks = True
 
     def portable_manifest_text(self, stream_name="."):
         """Get the manifest text for this collection, sub collections and files.
@@ -1286,8 +1300,12 @@ class Collection(RichCollectionBase):
                 self._manifest_locator = manifest_locator_or_text
             elif re.match(arvados.util.collection_uuid_pattern, manifest_locator_or_text):
                 self._manifest_locator = manifest_locator_or_text
+                if not self._has_local_collection_uuid():
+                    self._has_remote_blocks = True
             elif re.match(arvados.util.manifest_pattern, manifest_locator_or_text):
                 self._manifest_text = manifest_locator_or_text
+                if '+R' in self._manifest_text:
+                    self._has_remote_blocks = True
             else:
                 raise errors.ArgumentError(
                     "Argument to CollectionReader is not a manifest or a collection UUID")
@@ -1537,10 +1555,11 @@ class Collection(RichCollectionBase):
             t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
             body["trash_at"] = t
 
-        # Copy any remote blocks to the local cluster.
-        self._copy_remote_blocks(remote_blocks={})
-
         if not self.committed():
+            if self._has_remote_blocks:
+                # Copy any remote blocks to the local cluster.
+                self._copy_remote_blocks(remote_blocks={})
+                self._has_remote_blocks = False
             if not self._has_collection_uuid():
                 raise AssertionError("Collection manifest_locator is not a collection uuid.  Use save_new() for new collections.")
             elif not self._has_local_collection_uuid():
@@ -1629,8 +1648,10 @@ class Collection(RichCollectionBase):
         if trash_at and type(trash_at) is not datetime.datetime:
             raise errors.ArgumentError("trash_at must be datetime type.")
 
-        # Copy any remote blocks to the local cluster.
-        self._copy_remote_blocks(remote_blocks={})
+        if self._has_remote_blocks:
+            # Copy any remote blocks to the local cluster.
+            self._copy_remote_blocks(remote_blocks={})
+            self._has_remote_blocks = False
 
         self._my_block_manager().commit_all()
         text = self.manifest_text(strip=False)
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 491f95d9d..ac18c44c6 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1198,9 +1198,13 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
         c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
         self.assertEqual(
             len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
+        self.assertEqual(
+            len(re.findall(self.local_locator_re, c.manifest_text())), 0)
         c.save_new()
         rs_mock.assert_called()
         self.assertEqual(
+            len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
+        self.assertEqual(
             len(re.findall(self.local_locator_re, c.manifest_text())), 1)
 
     @mock.patch('arvados.keep.KeepClient.refresh_signature')
@@ -1224,9 +1228,12 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
         # Copy remote file to local collection
         local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
         self.assertEqual(
+            len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+        self.assertEqual(
             len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
         # Save local collection: remote block should be copied
         local_c.save()
+        rs_mock.assert_called()
         self.assertEqual(
             len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
         self.assertEqual(

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list