[ARVADOS] updated: 1.1.3-87-g52f37db

Thu Feb 22 15:59:37 EST 2018

Summary of changes:
 sdk/python/arvados/collection.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

       via  52f37db5889f22e4dc02e0e468d561783e76c188 (commit)
      from  92002b9af320e3229f59ca0778d1906e663f3066 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 52f37db5889f22e4dc02e0e468d561783e76c188
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Feb 22 15:58:56 2018 -0500

    13064: Explicitly precompile regexes used in manifest parsing
    
    Seems to improve parsing time roughly 5-10%
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 4be098d..33333ee 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -1531,6 +1531,10 @@ class Collection(RichCollectionBase):
 
         return text
 
+    _token_re = re.compile(r'(\S+)(\s+|$)')
+    _block_re = re.compile(r'[0-9a-f]{32}\+(\d+)(\+\S+)*')
+    _segment_re = re.compile(r'(\d+):(\d+):(\S+)')
+
     @synchronized
     def _import_manifest(self, manifest_text):
         """Import a manifest into a `Collection`.
@@ -1549,7 +1553,7 @@ class Collection(RichCollectionBase):
         stream_name = None
         state = STREAM_NAME
 
-        for token_and_separator in re.finditer(r'(\S+)(\s+|$)', manifest_text):
+        for token_and_separator in self._token_re.finditer(manifest_text):
             tok = token_and_separator.group(1)
             sep = token_and_separator.group(2)
 
@@ -1564,7 +1568,7 @@ class Collection(RichCollectionBase):
                 continue
 
             if state == BLOCKS:
-                block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
+                block_locator = self._block_re.match(tok)
                 if block_locator:
                     blocksize = int(block_locator.group(1))
                     blocks.append(Range(tok, streamoffset, blocksize, 0))
@@ -1573,7 +1577,7 @@ class Collection(RichCollectionBase):
                     state = SEGMENTS
 
             if state == SEGMENTS:
-                file_segment = re.search(r'^(\d+):(\d+):(\S+)', tok)
+                file_segment = self._segment_re.match(tok)
                 if file_segment:
                     pos = int(file_segment.group(1))
                     size = int(file_segment.group(2))

-----------------------------------------------------------------------


hooks/post-receive
--