[ARVADOS] created: bf0945d1136d2578e40909d54e8614085d6f9c34
Git user
git at public.curoverse.com
Wed Feb 8 18:06:20 EST 2017
at bf0945d1136d2578e40909d54e8614085d6f9c34 (commit)
commit bf0945d1136d2578e40909d54e8614085d6f9c34
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Wed Feb 8 20:03:32 2017 -0300
10956: When asked for the recently uploaded collection's pdh, arv-put will compute a pdh from the local collection's manifest and compare it with the API server provided version. If they differ, it will log a warning, always returning the API server's version.
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index d421d2c..c26bb04 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -737,7 +737,14 @@ class ArvPutUploadJob(object):
return self._my_collection().manifest_locator()
def portable_data_hash(self):
- return self._my_collection().portable_data_hash()
+ pdh = self._my_collection().portable_data_hash()
+ m = self._my_collection().stripped_manifest()
+ local_pdh = hashlib.md5(m).hexdigest() + '+' + str(len(m))
+ if pdh != local_pdh:
+ logger.warning("\n".join([
+ "arv-put: API server provided PDH differs from local manifest.",
+ " This should not happen; showing API server version."]))
+ return pdh
def manifest_text(self, stream_name=".", strip=False, normalize=False):
return self._my_collection().manifest_text(stream_name, strip, normalize)
commit ca56623679bcf5733a3266711f513f8c23f8b0df
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Wed Feb 8 19:37:45 2017 -0300
10956: Get PDH from API server's response when saving a collection so that it doesn't have to be calculated when being asked for later on.
Updated tests to reflect this change.
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index debe7de..f735b9e 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -1063,8 +1063,13 @@ class RichCollectionBase(CollectionBase):
def portable_data_hash(self):
"""Get the portable data hash for this collection's manifest."""
- stripped = self.portable_manifest_text()
- return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
+ if self._manifest_locator and self.committed():
+ # If the collection is already saved on the API server, and it's committed
+ # then return API server's PDH response.
+ return self._portable_data_hash
+ else:
+ stripped = self.portable_manifest_text()
+ return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
@synchronized
def subscribe(self, callback):
@@ -1203,6 +1208,7 @@ class Collection(RichCollectionBase):
self.num_retries = num_retries if num_retries is not None else 0
self._manifest_locator = None
self._manifest_text = None
+ self._portable_data_hash = None
self._api_response = None
self._past_versions = set()
@@ -1469,6 +1475,7 @@ class Collection(RichCollectionBase):
).execute(
num_retries=num_retries))
self._manifest_text = self._api_response["manifest_text"]
+ self._portable_data_hash = self._api_response["portable_data_hash"]
self.set_committed()
return self._manifest_text
@@ -1527,6 +1534,7 @@ class Collection(RichCollectionBase):
text = self._api_response["manifest_text"]
self._manifest_locator = self._api_response["uuid"]
+ self._portable_data_hash = self._api_response["portable_data_hash"]
self._manifest_text = text
self.set_committed()
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 8f02d51..1b66935 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -68,7 +68,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n"})
+ "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
+ "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
api_client=api, keep_client=keep) as c:
writer = c.open("count.txt", "r+")
@@ -96,7 +97,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n"})
+ "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
+ "portable_data_hash":"c5c3af76565c8efb6a806546bcf073f3+88"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
api_client=api, keep_client=keep) as c:
writer = c.open("count.txt", "r+")
@@ -233,7 +235,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n"})
+ "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
+ "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
api_client=api, keep_client=keep) as c:
writer = c.open("count.txt", "r+")
@@ -325,7 +328,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n"})
+ "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
+ "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
api_client=api, keep_client=keep) as c:
writer = c.open("count.txt", "r+")
@@ -348,7 +352,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+ "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
with Collection(api_client=api, keep_client=keep) as c:
writer = c.open("count.txt", "w+")
self.assertEqual(writer.size(), 0)
@@ -370,7 +375,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
+ "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+ "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
with Collection(api_client=api, keep_client=keep) as c:
self.assertIsNone(c.api_response())
writer = c.open("foo/bar/count.txt", "w+")
@@ -386,7 +392,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+ "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
api_client=api, keep_client=keep) as c:
writer = c.open("count.txt", "w+")
@@ -416,7 +423,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
"manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
"replication_desired":None},
{"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n"})
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
+ "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
with Collection(api_client=api, keep_client=keep) as c:
w1 = c.open("count1.txt", "w")
w2 = c.open("count2.txt", "w")
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list