[ARVADOS] updated: 1.2.0-191-g71fa8bdbd
Git user
git at public.curoverse.com
Tue Nov 6 09:40:10 EST 2018
Summary of changes:
sdk/python/arvados/arvfile.py | 8 ++++++++
sdk/python/arvados/collection.py | 31 ++++++++++++++++++++++++++-----
sdk/python/tests/test_collections.py | 7 +++++++
3 files changed, 41 insertions(+), 5 deletions(-)
via 71fa8bdbd8bfb44ce3551d1e69ada6f2780e815c (commit)
from b8addea7d6c8c464a2743191561470332eeaba13 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 71fa8bdbd8bfb44ce3551d1e69ada6f2780e815c
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Tue Nov 6 11:39:14 2018 -0300
14259: Don't scan the entire collection for remote blocks when not needed.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index f4580f346..f58c882e2 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -901,6 +901,14 @@ class ArvadosFile(object):
return False
@synchronized
+ def has_remote_blocks(self):
+ """Returns True if any of the segment's locators has a +R signature"""
+ for s in self._segments:
+ if '+R' in s.locator:
+ return True
+ return False
+
+ @synchronized
def segments(self):
return copy.copy(self._segments)
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 13301b297..d63e9424e 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -520,6 +520,7 @@ class RichCollectionBase(CollectionBase):
def __init__(self, parent=None):
self.parent = parent
self._committed = False
+ self._has_remote_blocks = False
self._callback = None
self._items = {}
@@ -544,6 +545,15 @@ class RichCollectionBase(CollectionBase):
def stream_name(self):
raise NotImplementedError()
+ @synchronized
+ def has_remote_blocks(self):
+ """Recursively check for a +R segment locator signature."""
+
+ for item in self:
+ if self[item].has_remote_blocks():
+ return True
+ return False
+
@must_be_writable
@synchronized
def find_or_create(self, path, create_type):
@@ -901,6 +911,8 @@ class RichCollectionBase(CollectionBase):
source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
target_dir.add(source_obj, target_name, overwrite, False)
+ if not self._has_remote_blocks and source_obj.has_remote_blocks():
+ self._has_remote_blocks = True
@must_be_writable
@synchronized
@@ -927,6 +939,8 @@ class RichCollectionBase(CollectionBase):
if not source_obj.writable():
raise IOError(errno.EROFS, "Source collection is read only", source)
target_dir.add(source_obj, target_name, overwrite, True)
+ if not self._has_remote_blocks and source_obj.has_remote_blocks():
+ self._has_remote_blocks = True
def portable_manifest_text(self, stream_name="."):
"""Get the manifest text for this collection, sub collections and files.
@@ -1286,8 +1300,12 @@ class Collection(RichCollectionBase):
self._manifest_locator = manifest_locator_or_text
elif re.match(arvados.util.collection_uuid_pattern, manifest_locator_or_text):
self._manifest_locator = manifest_locator_or_text
+ if not self._has_local_collection_uuid():
+ self._has_remote_blocks = True
elif re.match(arvados.util.manifest_pattern, manifest_locator_or_text):
self._manifest_text = manifest_locator_or_text
+ if '+R' in self._manifest_text:
+ self._has_remote_blocks = True
else:
raise errors.ArgumentError(
"Argument to CollectionReader is not a manifest or a collection UUID")
@@ -1537,10 +1555,11 @@ class Collection(RichCollectionBase):
t = trash_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
body["trash_at"] = t
- # Copy any remote blocks to the local cluster.
- self._copy_remote_blocks(remote_blocks={})
-
if not self.committed():
+ if self._has_remote_blocks:
+ # Copy any remote blocks to the local cluster.
+ self._copy_remote_blocks(remote_blocks={})
+ self._has_remote_blocks = False
if not self._has_collection_uuid():
raise AssertionError("Collection manifest_locator is not a collection uuid. Use save_new() for new collections.")
elif not self._has_local_collection_uuid():
@@ -1629,8 +1648,10 @@ class Collection(RichCollectionBase):
if trash_at and type(trash_at) is not datetime.datetime:
raise errors.ArgumentError("trash_at must be datetime type.")
- # Copy any remote blocks to the local cluster.
- self._copy_remote_blocks(remote_blocks={})
+ if self._has_remote_blocks:
+ # Copy any remote blocks to the local cluster.
+ self._copy_remote_blocks(remote_blocks={})
+ self._has_remote_blocks = False
self._my_block_manager().commit_all()
text = self.manifest_text(strip=False)
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 491f95d9d..ac18c44c6 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1198,9 +1198,13 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
c = Collection(". " + remote_block_loc + " 0:3:foofile.txt\n")
self.assertEqual(
len(re.findall(self.remote_locator_re, c.manifest_text())), 1)
+ self.assertEqual(
+ len(re.findall(self.local_locator_re, c.manifest_text())), 0)
c.save_new()
rs_mock.assert_called()
self.assertEqual(
+ len(re.findall(self.remote_locator_re, c.manifest_text())), 0)
+ self.assertEqual(
len(re.findall(self.local_locator_re, c.manifest_text())), 1)
@mock.patch('arvados.keep.KeepClient.refresh_signature')
@@ -1224,9 +1228,12 @@ class NewCollectionTestCaseWithServersAndTokens(run_test_server.TestCaseWithServ
# Copy remote file to local collection
local_c.copy('./foofile.txt', './copied/foofile.txt', remote_c)
self.assertEqual(
+ len(re.findall(self.local_locator_re, local_c.manifest_text())), 1)
+ self.assertEqual(
len(re.findall(self.remote_locator_re, local_c.manifest_text())), 1)
# Save local collection: remote block should be copied
local_c.save()
+ rs_mock.assert_called()
self.assertEqual(
len(re.findall(self.local_locator_re, local_c.manifest_text())), 2)
self.assertEqual(
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list