[ARVADOS] updated: 1.2.0-308-gda53a8d80
Git user
git at public.curoverse.com
Thu Nov 8 14:58:20 EST 2018
Summary of changes:
sdk/python/arvados/arvfile.py | 24 ++++++++++++++++++++++++
sdk/python/arvados/collection.py | 27 +++++++++++----------------
2 files changed, 35 insertions(+), 16 deletions(-)
via da53a8d809db6f47cbcf03739fbfbef25e52b6fa (commit)
from c3b26754a231ec909506f2ff28af1af9f2e27f2b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit da53a8d809db6f47cbcf03739fbfbef25e52b6fa
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date: Thu Nov 8 16:57:13 2018 -0300
14259: Improvements on remote blocks copying logic.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index f58c882e2..3281d78e2 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -903,12 +903,36 @@ class ArvadosFile(object):
@synchronized
def has_remote_blocks(self):
"""Returns True if any of the segment's locators has a +R signature"""
+
for s in self._segments:
if '+R' in s.locator:
return True
return False
@synchronized
+ def _copy_remote_blocks(self, remote_blocks={}):
+ """Ask Keep to copy remote blocks and point to their local copies.
+
+ This is called from the parent Collection.
+
+ :remote_blocks:
+ Shared cache of remote to local block mappings. This is used to avoid
+ doing extra work when blocks are shared by more than one file in
+ different subdirectories.
+ """
+
+ for s in self._segments:
+ if '+R' in s.locator:
+ try:
+ loc = remote_blocks[s.locator]
+ except KeyError:
+ loc = self.parent._my_keep().refresh_signature(s.locator)
+ remote_blocks[s.locator] = loc
+ s.locator = loc
+ self.parent.set_committed(False)
+ return remote_blocks
+
+ @synchronized
def segments(self):
return copy.copy(self._segments)
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index d63e9424e..65e48927c 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -549,11 +549,19 @@ class RichCollectionBase(CollectionBase):
def has_remote_blocks(self):
"""Recursively check for a +R segment locator signature."""
+ if self._has_remote_blocks:
+ return True
for item in self:
if self[item].has_remote_blocks():
return True
return False
+ @synchronized
+ def set_has_remote_blocks(self, val):
+ self._has_remote_blocks = val
+ if self.parent:
+ self.parent.set_has_remote_blocks(val)
+
@must_be_writable
@synchronized
def find_or_create(self, path, create_type):
@@ -842,6 +850,8 @@ class RichCollectionBase(CollectionBase):
self._items[target_name] = item
self.set_committed(False)
+ if not self._has_remote_blocks and source_obj.has_remote_blocks():
+ self.set_has_remote_blocks(True)
if modified_from:
self.notify(MOD, self, target_name, (modified_from, item))
@@ -911,8 +921,6 @@ class RichCollectionBase(CollectionBase):
source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
target_dir.add(source_obj, target_name, overwrite, False)
- if not self._has_remote_blocks and source_obj.has_remote_blocks():
- self._has_remote_blocks = True
@must_be_writable
@synchronized
@@ -939,8 +947,6 @@ class RichCollectionBase(CollectionBase):
if not source_obj.writable():
raise IOError(errno.EROFS, "Source collection is read only", source)
target_dir.add(source_obj, target_name, overwrite, True)
- if not self._has_remote_blocks and source_obj.has_remote_blocks():
- self._has_remote_blocks = True
def portable_manifest_text(self, stream_name="."):
"""Get the manifest text for this collection, sub collections and files.
@@ -1052,18 +1058,7 @@ class RichCollectionBase(CollectionBase):
"""
for item in self:
- if isinstance(self[item], ArvadosFile):
- for s in self[item].segments():
- if '+R' in s.locator:
- try:
- loc = remote_blocks[s.locator]
- except KeyError:
- loc = self._my_keep().refresh_signature(s.locator)
- remote_blocks[s.locator] = loc
- s.locator = loc
- self.set_committed(False)
- elif isinstance(self[item], RichCollectionBase):
- remote_blocks = self[item]._copy_remote_blocks(remote_blocks)
+ remote_blocks = self[item]._copy_remote_blocks(remote_blocks)
return remote_blocks
@synchronized
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list