[ARVADOS] created: 2cdf96e15bbd5d45742a659f6ddcd7e3ddada855
git at public.curoverse.com
git at public.curoverse.com
Mon Feb 10 12:15:34 EST 2014
at 2cdf96e15bbd5d45742a659f6ddcd7e3ddada855 (commit)
commit 2cdf96e15bbd5d45742a659f6ddcd7e3ddada855
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Feb 9 15:29:00 2014 -0800
When retrieving a blob, if local Keep servers don't have it and the
locator has +K at xyzzy, try GET http://keep.xyzzy.arvadosapi.com/hash.
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index b2bf3b4..e1902d1 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -174,26 +174,38 @@ class KeepClient(object):
return KeepClient.local_store_get(locator)
expect_hash = re.sub(r'\+.*', '', locator)
for service_root in self.shuffled_service_roots(expect_hash):
- h = httplib2.Http()
url = service_root + expect_hash
api_token = config.get('ARVADOS_API_TOKEN')
headers = {'Authorization': "OAuth2 %s" % api_token,
'Accept': 'application/octet-stream'}
- try:
- resp, content = h.request(url.encode('utf-8'), 'GET',
- headers=headers)
- if re.match(r'^2\d\d$', resp['status']):
- m = hashlib.new('md5')
- m.update(content)
- md5 = m.hexdigest()
- if md5 == expect_hash:
- return content
- logging.warning("Checksum fail: md5(%s) = %s" % (url, md5))
- except (httplib2.HttpLib2Error, httplib.ResponseNotReady) as e:
- logging.info("Request fail: GET %s => %s: %s" %
- (url, type(e), str(e)))
+ blob = self.get_url(url, headers, expect_hash)
+ if blob:
+ return blob
+ for location_hint in re.finditer(r'\+K@([a-z0-9]+)', locator):
+ instance = location_hint.group(1)
+ url = 'http://keep.' + instance + '.arvadosapi.com/' + expect_hash
+ blob = self.get_url(url, {}, expect_hash)
+ if blob:
+ return blob
raise arvados.errors.NotFoundError("Block not found: %s" % expect_hash)
+ def get_url(self, url, headers, expect_hash):
+ h = httplib2.Http()
+ try:
+ resp, content = h.request(url.encode('utf-8'), 'GET',
+ headers=headers)
+ if re.match(r'^2\d\d$', resp['status']):
+ m = hashlib.new('md5')
+ m.update(content)
+ md5 = m.hexdigest()
+ if md5 == expect_hash:
+ return content
+ logging.warning("Checksum fail: md5(%s) = %s" % (url, md5))
+ except Exception as e:
+ logging.info("Request fail: GET %s => %s: %s" %
+ (url, type(e), str(e)))
+ return None
+
def put(self, data, **kwargs):
if 'KEEP_LOCAL_STORE' in os.environ:
return KeepClient.local_store_put(data)
commit cfa8a6978a03b4b2c3092581b0b4eb8e8f8e9d6b
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Feb 9 14:54:58 2014 -0800
Retrieve manifest_text from API server. If that fails, emit a warning
and fall back to reading directly from Keep.
This gives the API server an opportunity to provide additional
metadata, like hints about where the data blobs are stored.
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 8e39318..ea98d00 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -43,7 +43,14 @@ class CollectionReader(object):
if self._streams != None:
return
if not self._manifest_text:
- self._manifest_text = Keep.get(self._manifest_locator)
+ try:
+ c = arvados.api('v1').collections().get(
+ uuid=self._manifest_locator).execute()
+ self._manifest_text = c['manifest_text']
+ except Exception as e:
+ logging.warning("API lookup failed for collection %s (%s: %s)" %
+ (self._manifest_locator, type(e), str(e)))
+ self._manifest_text = Keep.get(self._manifest_locator)
self._streams = []
for stream_line in self._manifest_text.split("\n"):
if stream_line != '':
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get
index 4154a3d..30beedc 100755
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -124,7 +124,16 @@ if not get_prefix:
logger.error('Local file %s already exists' % args.destination)
sys.exit(1)
with open(args.destination, 'wb') as f:
- f.write(arvados.Keep.get(collection))
+ try:
+ c = arvados.api('v1').collections().get(
+ uuid=collection).execute()
+ manifest = c['manifest_text']
+ except Exception as e:
+ logging.warning(
+ "API lookup failed for collection %s (%s: %s)" %
+ (collection, type(e), str(e)))
+ manifest = arvados.Keep.get(collection)
+ f.write(manifest)
sys.exit(0)
except arvados.errors.NotFoundError as e:
logger.error(e)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list