[ARVADOS] created: bf0945d1136d2578e40909d54e8614085d6f9c34

Git user git at public.curoverse.com
Wed Feb 8 18:06:20 EST 2017


        at  bf0945d1136d2578e40909d54e8614085d6f9c34 (commit)


commit bf0945d1136d2578e40909d54e8614085d6f9c34
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Wed Feb 8 20:03:32 2017 -0300

    10956: When asked for the recently uploaded collection's pdh, arv-put will compute a pdh from the local collection's manifest and compare it with the API server provided version. If they differ, it will log a warning, always returning the API server's version.

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index d421d2c..c26bb04 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -737,7 +737,14 @@ class ArvPutUploadJob(object):
         return self._my_collection().manifest_locator()
 
     def portable_data_hash(self):
-        return self._my_collection().portable_data_hash()
+        pdh = self._my_collection().portable_data_hash()
+        m = self._my_collection().stripped_manifest()
+        local_pdh = hashlib.md5(m).hexdigest() + '+' + str(len(m))
+        if pdh != local_pdh:
+            logger.warning("\n".join([
+                "arv-put: API server provided PDH differs from local manifest.",
+                "         This should not happen; showing API server version."]))
+        return pdh
 
     def manifest_text(self, stream_name=".", strip=False, normalize=False):
         return self._my_collection().manifest_text(stream_name, strip, normalize)

commit ca56623679bcf5733a3266711f513f8c23f8b0df
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Wed Feb 8 19:37:45 2017 -0300

    10956: Get PDH from API server's response when saving a collection so that it doesn't have to be calculated when being asked for later on.
    Updated tests to reflect this change.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index debe7de..f735b9e 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -1063,8 +1063,13 @@ class RichCollectionBase(CollectionBase):
 
     def portable_data_hash(self):
         """Get the portable data hash for this collection's manifest."""
-        stripped = self.portable_manifest_text()
-        return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
+        if self._manifest_locator and self.committed():
+            # If the collection is already saved on the API server, and it's committed
+            # then return API server's PDH response.
+            return self._portable_data_hash
+        else:
+            stripped = self.portable_manifest_text()
+            return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
 
     @synchronized
     def subscribe(self, callback):
@@ -1203,6 +1208,7 @@ class Collection(RichCollectionBase):
         self.num_retries = num_retries if num_retries is not None else 0
         self._manifest_locator = None
         self._manifest_text = None
+        self._portable_data_hash = None
         self._api_response = None
         self._past_versions = set()
 
@@ -1469,6 +1475,7 @@ class Collection(RichCollectionBase):
                 ).execute(
                     num_retries=num_retries))
             self._manifest_text = self._api_response["manifest_text"]
+            self._portable_data_hash = self._api_response["portable_data_hash"]
             self.set_committed()
 
         return self._manifest_text
@@ -1527,6 +1534,7 @@ class Collection(RichCollectionBase):
             text = self._api_response["manifest_text"]
 
             self._manifest_locator = self._api_response["uuid"]
+            self._portable_data_hash = self._api_response["portable_data_hash"]
 
             self._manifest_text = text
             self.set_committed()
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py
index 8f02d51..1b66935 100644
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -68,7 +68,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n"})
+                                                 "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
+                                                 "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52"})
         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "r+")
@@ -96,7 +97,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n"})
+                                                 "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
+                                                 "portable_data_hash":"c5c3af76565c8efb6a806546bcf073f3+88"})
         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "r+")
@@ -233,7 +235,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n"})
+                                                 "manifest_text": ". a5de24f4417cfba9d5825eadc2f4ca49+67108000 598cc1a4ccaef8ab6e4724d87e675d78+32892000 0:100000000:count.txt\n",
+                                                 "portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "r+")
@@ -325,7 +328,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n"})
+                                                 "manifest_text": ". 37400a68af9abdd76ca5bf13e819e42a+32892003 a5de24f4417cfba9d5825eadc2f4ca49+67108000 32892000:3:count.txt 32892006:67107997:count.txt 0:32892000:count.txt\n",
+                                                 "portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
         with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "r+")
@@ -348,7 +352,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
+                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+                                                 "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
         with Collection(api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "w+")
             self.assertEqual(writer.size(), 0)
@@ -370,7 +375,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
+                                                 "manifest_text":"./foo/bar 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+                                                 "portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
         with Collection(api_client=api, keep_client=keep) as c:
             self.assertIsNone(c.api_response())
             writer = c.open("foo/bar/count.txt", "w+")
@@ -386,7 +392,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n"})
+                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+                                                 "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
         with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
                              api_client=api, keep_client=keep) as c:
             writer = c.open("count.txt", "w+")
@@ -416,7 +423,8 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
                                                  "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
                                                  "replication_desired":None},
                                                 {"uuid":"zzzzz-4zz18-mockcollection0",
-                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n"})
+                                                 "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
+                                                 "portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
         with Collection(api_client=api, keep_client=keep) as c:
             w1 = c.open("count1.txt", "w")
             w2 = c.open("count2.txt", "w")

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list