[ARVADOS] updated: f9328bf507490f48e57b196be1de55d946644dea

git at public.curoverse.com git at public.curoverse.com
Fri Oct 31 18:41:06 EDT 2014


Summary of changes:
 sdk/python/arvados/collection.py     | 80 ++++++++++++++++++------------------
 sdk/python/arvados/commands/put.py   | 52 ++++++++++++-----------
 sdk/python/arvados/stream.py         |  2 +-
 sdk/python/tests/test_collections.py | 29 +++++++------
 4 files changed, 84 insertions(+), 79 deletions(-)

       via  f9328bf507490f48e57b196be1de55d946644dea (commit)
       via  7505be2bb39b0a96304c46e89cd5392c8c037215 (commit)
       via  fad871168f4500959d48eac695336e49a92535b7 (commit)
       via  3f9d8b5ca7288921a62f5cbc6c59d64c4fd16707 (commit)
      from  0e8d9b4c01c08f0efbccd91497177fc102aa5c25 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit f9328bf507490f48e57b196be1de55d946644dea
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 31 17:21:31 2014 -0400

    3706: Really make the API server return the wrong data, instead of just saying so.

diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 00b2ac0..df8ba38 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -668,6 +668,9 @@ class CollectionTestMixin(object):
     DEFAULT_DATA_HASH = DEFAULT_COLLECTION['portable_data_hash']
     DEFAULT_MANIFEST = DEFAULT_COLLECTION['manifest_text']
     DEFAULT_UUID = DEFAULT_COLLECTION['uuid']
+    ALT_COLLECTION = API_COLLECTIONS['bar_file']
+    ALT_DATA_HASH = ALT_COLLECTION['portable_data_hash']
+    ALT_MANIFEST = ALT_COLLECTION['manifest_text']
 
     def _mock_api_call(self, mock_method, code, body):
         mock_method = mock_method().execute
@@ -747,11 +750,11 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
         # To verify that CollectionReader tries Keep first here, we
         # mock API server to return the wrong data.
         client = self.api_client_mock(200)
-        with tutil.mock_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_responses(self.ALT_MANIFEST, 200):
             self.assertEqual(
-                self.DEFAULT_MANIFEST,
+                self.ALT_MANIFEST,
                 arvados.CollectionReader(
-                    self.DEFAULT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff at fedcba98',
+                    self.ALT_DATA_HASH + '+Affffffffffffffffffffffffffffffffffffffff at fedcba98',
                     api_client=client).manifest_text())
 
     def test_init_num_retries_propagated(self):

commit 7505be2bb39b0a96304c46e89cd5392c8c037215
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 31 16:48:34 2014 -0400

    3706: Put --normalize and --max-manifest-depth arguments in a mutually_exclusive_group.

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index cc4fd53..d070a8b 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -27,11 +27,13 @@ api_client = None
 upload_opts = argparse.ArgumentParser(add_help=False)
 
 upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
-                    help="""
+                         help="""
 Local file or directory. Default: read from standard input.
 """)
 
-upload_opts.add_argument('--max-manifest-depth', type=int, metavar='N',
+_group = upload_opts.add_mutually_exclusive_group()
+
+_group.add_argument('--max-manifest-depth', type=int, metavar='N',
                     default=-1, help="""
 Maximum depth of directory tree to represent in the manifest
 structure. A directory structure deeper than this will be represented
@@ -40,56 +42,62 @@ a single stream. Default: -1 (unlimited), i.e., exactly one manifest
 stream per filesystem directory that contains files.
 """)
 
+_group.add_argument('--normalize', action='store_true',
+                    help="""
+Normalize the manifest by re-ordering files and streams after writing
+data.
+""")
+
 _group = upload_opts.add_mutually_exclusive_group()
 
 _group.add_argument('--as-stream', action='store_true', dest='stream',
-                   help="""
+                    help="""
 Synonym for --stream.
 """)
 
 _group.add_argument('--stream', action='store_true',
-                   help="""
+                    help="""
 Store the file content and display the resulting manifest on
 stdout. Do not write the manifest to Keep or save a Collection object
 in Arvados.
 """)
 
 _group.add_argument('--as-manifest', action='store_true', dest='manifest',
-                   help="""
+                    help="""
 Synonym for --manifest.
 """)
 
 _group.add_argument('--in-manifest', action='store_true', dest='manifest',
-                   help="""
+                    help="""
 Synonym for --manifest.
 """)
 
 _group.add_argument('--manifest', action='store_true',
-                   help="""
+                    help="""
 Store the file data and resulting manifest in Keep, save a Collection
 object in Arvados, and display the manifest locator (Collection uuid)
 on stdout. This is the default behavior.
 """)
 
 _group.add_argument('--as-raw', action='store_true', dest='raw',
-                   help="""
+                    help="""
 Synonym for --raw.
 """)
 
 _group.add_argument('--raw', action='store_true',
-                   help="""
+                    help="""
 Store the file content and display the data block locators on stdout,
 separated by commas, with a trailing newline. Do not store a
 manifest.
 """)
 
 upload_opts.add_argument('--use-filename', type=str, default=None,
-                    dest='filename', help="""
+                         dest='filename', help="""
 Synonym for --filename.
 """)
 
 upload_opts.add_argument('--filename', type=str, default=None,
-                    help="""
+                         help="""
 Use the given filename in the manifest, instead of the name of the
 local file. This is useful when "-" or "/dev/stdin" is given as an
 input file. It can be used only if there is exactly one path given and
@@ -97,17 +105,11 @@ it is not a directory. Implies --manifest.
 """)
 
 upload_opts.add_argument('--portable-data-hash', action='store_true',
-                    help="""
+                         help="""
 Print the portable data hash instead of the Arvados UUID for the collection
 created by the upload.
 """)
 
-upload_opts.add_argument('--normalize', action='store_true',
-                    help="""
-Normalize the manifest by re-ordering files and streams after writing
-data. This makes the --max-manifest-depth option ineffective.
-""")
-
 run_opts = argparse.ArgumentParser(add_help=False)
 
 run_opts.add_argument('--project-uuid', metavar='UUID', help="""
@@ -121,31 +123,31 @@ Save the collection with the specified name.
 
 _group = run_opts.add_mutually_exclusive_group()
 _group.add_argument('--progress', action='store_true',
-                   help="""
+                    help="""
 Display human-readable progress on stderr (bytes and, if possible,
 percentage of total data size). This is the default behavior when
 stderr is a tty.
 """)
 
 _group.add_argument('--no-progress', action='store_true',
-                   help="""
+                    help="""
 Do not display human-readable progress on stderr, even if stderr is a
 tty.
 """)
 
 _group.add_argument('--batch-progress', action='store_true',
-                   help="""
+                    help="""
 Display machine-readable progress on stderr (bytes and, if known,
 total data size).
 """)
 
 _group = run_opts.add_mutually_exclusive_group()
 _group.add_argument('--resume', action='store_true', default=True,
-                   help="""
+                    help="""
 Continue interrupted uploads from cached state (default).
 """)
 _group.add_argument('--no-resume', action='store_false', dest='resume',
-                   help="""
+                    help="""
 Do not continue interrupted uploads from cached state.
 """)
 

commit fad871168f4500959d48eac695336e49a92535b7
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 31 16:44:19 2014 -0400

    3706: normalize() returns None instead of self. Add equally convenient, and more Pythonic, manifest_text(normalize=True) feature.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 64b2aa7..a7cb382 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -219,16 +219,13 @@ class CollectionReader(CollectionBase):
                 for r in f.segments:
                     streams[streamname][filename].extend(s.locators_and_ranges(r[0], r[1]))
 
-        self._streams = []
-        sortedstreams = list(streams.keys())
-        sortedstreams.sort()
-        for s in sortedstreams:
-            self._streams.append(normalize_stream(s, streams[s]))
+        self._streams = [normalize_stream(s, streams[s])
+                         for s in sorted(streams)]
 
         # Regenerate the manifest text based on the normalized streams
-        self._manifest_text = ''.join([StreamReader(stream, keep=self._my_keep()).manifest_text() for stream in self._streams])
-
-        return self
+        self._manifest_text = ''.join(
+            [StreamReader(stream, keep=self._my_keep()).manifest_text()
+             for stream in self._streams])
 
     def all_streams(self):
         self._populate()
@@ -240,8 +237,12 @@ class CollectionReader(CollectionBase):
             for f in s.all_files():
                 yield f
 
-    def manifest_text(self, strip=False):
-        if strip:
+    def manifest_text(self, strip=False, normalize=False):
+        if normalize:
+            cr = CollectionReader(self.manifest_text())
+            cr.normalize()
+            return cr.manifest_text(strip=strip, normalize=False)
+        elif strip:
             return self.stripped_manifest()
         else:
             self._populate()
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index ac81ab8..cc4fd53 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -454,14 +454,14 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     if args.stream:
         output = writer.manifest_text()
         if args.normalize:
-            output = CollectionReader(output).normalize().manifest_text()
+            output = CollectionReader(output).manifest_text(normalize=True)
     elif args.raw:
         output = ','.join(writer.data_locators())
     else:
         try:
             manifest_text = writer.manifest_text()
             if args.normalize:
-                manifest_text = CollectionReader(manifest_text).normalize().manifest_text()
+                manifest_text = CollectionReader(manifest_text).manifest_text(normalize=True)
             # Register the resulting collection in Arvados.
             collection = api_client.collections().create(
                 body={
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index 0e153f5..c9a0613 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -208,7 +208,7 @@ class StreamFileReader(object):
         manifest_text = ['.']
         manifest_text.extend([d[LOCATOR] for d in self._stream._data_locators])
         manifest_text.extend(["{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], self.name().replace(' ', '\\040')) for seg in self.segments])
-        return arvados.CollectionReader(' '.join(manifest_text) + '\n').normalize().manifest_text()
+        return arvados.CollectionReader(' '.join(manifest_text) + '\n').manifest_text(normalize=True)
 
 
 class StreamReader(object):
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 45067ad..00b2ac0 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -167,7 +167,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
         self.check_manifest_file_sizes(
             arvados.CollectionReader(
-                cw.manifest_text()).normalize().manifest_text(),
+                cw.manifest_text()).manifest_text(normalize=True),
             [0,1])
 
     def check_manifest_file_sizes(self, manifest_text, expect_sizes):
@@ -229,19 +229,19 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
 """
-        self.assertEqual(arvados.CollectionReader(m1, self.api_client).normalize().manifest_text(),
+        self.assertEqual(arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True),
                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
 """)
 
         m2 = """. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
 """
-        self.assertEqual(arvados.CollectionReader(m2, self.api_client).normalize().manifest_text(), m2)
+        self.assertEqual(arvados.CollectionReader(m2, self.api_client).manifest_text(normalize=True), m2)
 
         m3 = """. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
 """
-        self.assertEqual(arvados.CollectionReader(m3, self.api_client).normalize().manifest_text(),
+        self.assertEqual(arvados.CollectionReader(m3, self.api_client).manifest_text(normalize=True),
                          """. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
 """)
 
@@ -249,7 +249,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
 """
-        self.assertEqual(arvados.CollectionReader(m4, self.api_client).normalize().manifest_text(),
+        self.assertEqual(arvados.CollectionReader(m4, self.api_client).manifest_text(normalize=True),
                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
 """)
@@ -258,22 +258,22 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
 """
-        self.assertEqual(arvados.CollectionReader(m5, self.api_client).normalize().manifest_text(),
+        self.assertEqual(arvados.CollectionReader(m5, self.api_client).manifest_text(normalize=True),
                          """./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
 """)
 
         with self.data_file('1000G_ref_manifest') as f6:
             m6 = f6.read()
-            self.assertEqual(arvados.CollectionReader(m6, self.api_client).normalize().manifest_text(), m6)
+            self.assertEqual(arvados.CollectionReader(m6, self.api_client).manifest_text(normalize=True), m6)
 
         with self.data_file('jlake_manifest') as f7:
             m7 = f7.read()
-            self.assertEqual(arvados.CollectionReader(m7, self.api_client).normalize().manifest_text(), m7)
+            self.assertEqual(arvados.CollectionReader(m7, self.api_client).manifest_text(normalize=True), m7)
 
         m8 = """./a\\040b\\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\\040world.txt
 """
-        self.assertEqual(arvados.CollectionReader(m8, self.api_client).normalize().manifest_text(), m8)
+        self.assertEqual(arvados.CollectionReader(m8, self.api_client).manifest_text(normalize=True), m8)
 
     def test_locators_and_ranges(self):
         blocks2 = [['a', 10, 0],
@@ -504,7 +504,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
 . 085c37f02916da1cad16f93c54d899b7+41 5348b82a029fd9e971a811ce1f71360b+43 8b22da26f9f433dea0a10e5ec66d73ba+43 40:80:md9sum.txt
 """
 
-        m2 = arvados.CollectionReader(m1, self.api_client).normalize().manifest_text()
+        m2 = arvados.CollectionReader(m1, self.api_client).manifest_text(normalize=True)
 
         self.assertEqual(m2,
                          ". 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt 43:41:md6sum.txt 84:43:md7sum.txt 6:37:md8sum.txt 84:43:md8sum.txt 83:1:md9sum.txt 0:43:md9sum.txt 84:36:md9sum.txt\n")

commit 3f9d8b5ca7288921a62f5cbc6c59d64c4fd16707
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Oct 31 16:30:07 2014 -0400

    3706: Catch and return exceptions in _populate_* methods to dry up _populate()

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 5b00ea0..64b2aa7 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -138,27 +138,35 @@ class CollectionReader(CollectionBase):
         self._streams = None
 
     def _populate_from_api_server(self):
-        # As in KeepClient itself, we must wait until the last possible
-        # moment to instantiate an API client, in order to avoid
-        # tripping up clients that don't have access to an API server.
-        # If we do build one, make sure our Keep client uses it.
-        # If instantiation fails, we'll fall back to the except clause,
-        # just like any other Collection lookup failure.
-        if self._api_client is None:
-            self._api_client = arvados.api('v1')
-            self._keep_client = None  # Make a new one with the new api.
-        c = self._api_client.collections().get(
-            uuid=self._manifest_locator).execute(
-            num_retries=self.num_retries)
-        self._manifest_text = c['manifest_text']
+        # As in KeepClient itself, we must wait until the last
+        # possible moment to instantiate an API client, in order to
+        # avoid tripping up clients that don't have access to an API
+        # server.  If we do build one, make sure our Keep client uses
+        # it.  If instantiation fails, we'll fall back to the except
+        # clause, just like any other Collection lookup
+        # failure. Return an exception, or None if successful.
+        try:
+            if self._api_client is None:
+                self._api_client = arvados.api('v1')
+                self._keep_client = None  # Make a new one with the new api.
+            c = self._api_client.collections().get(
+                uuid=self._manifest_locator).execute(
+                num_retries=self.num_retries)
+            self._manifest_text = c['manifest_text']
+            return None
+        except Exception as e:
+            return e
 
     def _populate_from_keep(self):
         # Retrieve a manifest directly from Keep. This has a chance of
         # working if [a] the locator includes a permission signature
         # or [b] the Keep services are operating in world-readable
-        # mode.
-        self._manifest_text = self._my_keep().get(
-            self._manifest_locator, num_retries=self.num_retries)
+        # mode. Return an exception, or None if successful.
+        try:
+            self._manifest_text = self._my_keep().get(
+                self._manifest_locator, num_retries=self.num_retries)
+        except Exception as e:
+            return e
 
     def _populate(self):
         if self._streams is not None:
@@ -170,25 +178,16 @@ class CollectionReader(CollectionBase):
                 self._manifest_locator))
         if (not self._manifest_text and
             util.signed_locator_pattern.match(self._manifest_locator)):
-            try:
-                self._populate_from_keep()
-            except e:
-                error_via_keep = e
+            error_via_keep = self._populate_from_keep()
         if not self._manifest_text:
-            try:
-                self._populate_from_api_server()
-            except Exception as e:
-                if not should_try_keep:
-                    raise
-                error_via_api = e
+            error_via_api = self._populate_from_api_server()
+            if error_via_api != None and not should_try_keep:
+                raise error_via_api
         if (not self._manifest_text and
             not error_via_keep and
             should_try_keep):
             # Looks like a keep locator, and we didn't already try keep above
-            try:
-                self._populate_from_keep()
-            except Exception as e:
-                error_via_keep = e
+            error_via_keep = self._populate_from_keep()
         if not self._manifest_text:
             # Nothing worked!
             raise arvados.errors.NotFoundError(

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list