[ARVADOS] updated: 1.1.3-87-g52f37db
Git user
git at public.curoverse.com
Thu Feb 22 15:59:37 EST 2018
Summary of changes:
sdk/python/arvados/collection.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
via 52f37db5889f22e4dc02e0e468d561783e76c188 (commit)
from 92002b9af320e3229f59ca0778d1906e663f3066 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 52f37db5889f22e4dc02e0e468d561783e76c188
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Thu Feb 22 15:58:56 2018 -0500
13064: Explicitly precompile regexes used in manifest parsing
Seems to improve parsing time roughly 5-10%
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 4be098d..33333ee 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -1531,6 +1531,10 @@ class Collection(RichCollectionBase):
return text
+ _token_re = re.compile(r'(\S+)(\s+|$)')
+ _block_re = re.compile(r'[0-9a-f]{32}\+(\d+)(\+\S+)*')
+ _segment_re = re.compile(r'(\d+):(\d+):(\S+)')
+
@synchronized
def _import_manifest(self, manifest_text):
"""Import a manifest into a `Collection`.
@@ -1549,7 +1553,7 @@ class Collection(RichCollectionBase):
stream_name = None
state = STREAM_NAME
- for token_and_separator in re.finditer(r'(\S+)(\s+|$)', manifest_text):
+ for token_and_separator in self._token_re.finditer(manifest_text):
tok = token_and_separator.group(1)
sep = token_and_separator.group(2)
@@ -1564,7 +1568,7 @@ class Collection(RichCollectionBase):
continue
if state == BLOCKS:
- block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
+ block_locator = self._block_re.match(tok)
if block_locator:
blocksize = int(block_locator.group(1))
blocks.append(Range(tok, streamoffset, blocksize, 0))
@@ -1573,7 +1577,7 @@ class Collection(RichCollectionBase):
state = SEGMENTS
if state == SEGMENTS:
- file_segment = re.search(r'^(\d+):(\d+):(\S+)', tok)
+ file_segment = self._segment_re.match(tok)
if file_segment:
pos = int(file_segment.group(1))
size = int(file_segment.group(2))
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list