[ARVADOS] created: ff963e6ce16fc276cb41b5fb66f8febdbae30d95
git at public.curoverse.com
git at public.curoverse.com
Sun Feb 9 19:00:06 EST 2014
at ff963e6ce16fc276cb41b5fb66f8febdbae30d95 (commit)
commit ff963e6ce16fc276cb41b5fb66f8febdbae30d95
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Feb 9 15:29:00 2014 -0800
When retrieving a blob, if local Keep servers don't have it and the
locator has +K at xyzzy, try GET http://keep.xyzzy.arvadosapi.com/hash.
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index b2bf3b4..e1902d1 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -174,26 +174,38 @@ class KeepClient(object):
return KeepClient.local_store_get(locator)
expect_hash = re.sub(r'\+.*', '', locator)
for service_root in self.shuffled_service_roots(expect_hash):
- h = httplib2.Http()
url = service_root + expect_hash
api_token = config.get('ARVADOS_API_TOKEN')
headers = {'Authorization': "OAuth2 %s" % api_token,
'Accept': 'application/octet-stream'}
- try:
- resp, content = h.request(url.encode('utf-8'), 'GET',
- headers=headers)
- if re.match(r'^2\d\d$', resp['status']):
- m = hashlib.new('md5')
- m.update(content)
- md5 = m.hexdigest()
- if md5 == expect_hash:
- return content
- logging.warning("Checksum fail: md5(%s) = %s" % (url, md5))
- except (httplib2.HttpLib2Error, httplib.ResponseNotReady) as e:
- logging.info("Request fail: GET %s => %s: %s" %
- (url, type(e), str(e)))
+ blob = self.get_url(url, headers, expect_hash)
+ if blob:
+ return blob
+ for location_hint in re.finditer(r'\+K@([a-z0-9]+)', locator):
+ instance = location_hint.group(1)
+ url = 'http://keep.' + instance + '.arvadosapi.com/' + expect_hash
+ blob = self.get_url(url, {}, expect_hash)
+ if blob:
+ return blob
raise arvados.errors.NotFoundError("Block not found: %s" % expect_hash)
+ def get_url(self, url, headers, expect_hash):
+ h = httplib2.Http()
+ try:
+ resp, content = h.request(url.encode('utf-8'), 'GET',
+ headers=headers)
+ if re.match(r'^2\d\d$', resp['status']):
+ m = hashlib.new('md5')
+ m.update(content)
+ md5 = m.hexdigest()
+ if md5 == expect_hash:
+ return content
+ logging.warning("Checksum fail: md5(%s) = %s" % (url, md5))
+ except Exception as e:
+ logging.info("Request fail: GET %s => %s: %s" %
+ (url, type(e), str(e)))
+ return None
+
def put(self, data, **kwargs):
if 'KEEP_LOCAL_STORE' in os.environ:
return KeepClient.local_store_put(data)
commit 7865079f2d9a33368099037b7dfc1676145f09e0
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Feb 9 14:54:58 2014 -0800
Retrieve manifest_text from API server. If that fails, emit a warning
and fall back to reading directly from Keep.
This gives the API server an opportunity to provide additional
metadata, like hints about where the data blobs are stored.
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 8e39318..ea98d00 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -43,7 +43,14 @@ class CollectionReader(object):
if self._streams != None:
return
if not self._manifest_text:
- self._manifest_text = Keep.get(self._manifest_locator)
+ try:
+ c = arvados.api('v1').collections().get(
+ uuid=self._manifest_locator).execute()
+ self._manifest_text = c['manifest_text']
+ except Exception as e:
+ logging.warning("API lookup failed for collection %s (%s: %s)" %
+ (self._manifest_locator, type(e), str(e)))
+ self._manifest_text = Keep.get(self._manifest_locator)
self._streams = []
for stream_line in self._manifest_text.split("\n"):
if stream_line != '':
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get
index 4154a3d..30beedc 100755
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -124,7 +124,16 @@ if not get_prefix:
logger.error('Local file %s already exists' % args.destination)
sys.exit(1)
with open(args.destination, 'wb') as f:
- f.write(arvados.Keep.get(collection))
+ try:
+ c = arvados.api('v1').collections().get(
+ uuid=collection).execute()
+ manifest = c['manifest_text']
+ except Exception as e:
+ logging.warning(
+ "API lookup failed for collection %s (%s: %s)" %
+ (collection, type(e), str(e)))
+ manifest = arvados.Keep.get(collection)
+ f.write(manifest)
sys.exit(0)
except arvados.errors.NotFoundError as e:
logger.error(e)
commit be64bab4a991a6a48120d868c23e43d9024b6103
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Feb 9 14:36:51 2014 -0800
Include manifest_text and portable_manifest_text in collections.get
API response, and allow client to provide either one or both (and a
uuid matching either one) during collections.create.
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index feed5ce..454ee18 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -51,6 +51,10 @@ class Arvados::V1::CollectionsController < ApplicationController
show
end
+ def show
+ render json: @object.as_api_response(:with_data)
+ end
+
def collection_uuid(uuid)
m = /([a-f0-9]{32}(\+[0-9]+)?)(\+.*)?/.match(uuid)
if m
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 03e5e4e..47e4c8d 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -8,6 +8,11 @@ class Collection < ArvadosModel
t.add :files
end
+ api_accessible :with_data, extend: :user do |t|
+ t.add :portable_manifest_text
+ t.add :manifest_text
+ end
+
def redundancy_status
if redundancy_confirmed_as.nil?
'unconfirmed'
@@ -25,6 +30,17 @@ class Collection < ArvadosModel
end
def assign_uuid
+ # The client may provide either a portable or a non-portable
+ # manifest or both, as long as the given UUID matches one of
+ # them. If only one is provided, we make up something reasonable
+ # for the other. This behavior allows the client to expect all
+ # three of "uuid given == uuid stored in database", "uuid is
+ # stable when non-portable manifest changes, if md5(portable
+ # manifest) given", and "other clients see the +K at xyzzy hints I
+ # provide until the api server takes charge of them".
+ self.manifest_text ||= portable_manifest_text
+ self.portable_manifest_text ||= manifest_text.andand.gsub /\+K@[a-z0-9]+/, ''
+
if self.manifest_text.nil? and self.uuid.nil?
super
elsif self.manifest_text and self.uuid
@@ -32,15 +48,18 @@ class Collection < ArvadosModel
if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
true
+ elsif self.uuid == Digest::MD5.hexdigest(self.portable_manifest_text)
+ self.uuid.gsub! /$/, '+' + self.portable_manifest_text.length.to_s
+ true
else
- errors.add :uuid, 'uuid does not match checksum of manifest_text'
+ errors.add :uuid, 'does not match checksum of manifest_text or portable_manifest_text'
false
end
elsif self.manifest_text
- errors.add :uuid, 'checksum for manifest_text not supplied in uuid'
+ errors.add :uuid, 'not supplied to match manifest_text'
false
else
- errors.add :manifest_text, 'manifest_text not supplied'
+ errors.add :manifest_text, 'not supplied'
false
end
end
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index bffb47a..3573ca2 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -103,6 +103,66 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
assert_response 422
end
+ test "create with checksum matching portable manifest" do
+ authorize_with :active
+ post :create, {
+ collection: {
+ manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy 0:0:foo.txt\n",
+ uuid: "9458070263ca8298748bf27253a6c469+57"
+ }
+ }
+ assert_response :success
+ assert_nil assigns(:objects)
+ resp = JSON.parse(@response.body)
+ assert_equal '9458070263ca8298748bf27253a6c469+49', resp['uuid']
+ end
+
+ test "create with checksum matching non-portable manifest" do
+ authorize_with :active
+ post :create, {
+ collection: {
+ manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy 0:0:foo.txt\n",
+ uuid: "fbf92328d90daac6360915f374894456"
+ }
+ }
+ assert_response :success
+ assert_nil assigns(:objects)
+ resp = JSON.parse(@response.body)
+ assert_equal 'fbf92328d90daac6360915f374894456+57', resp['uuid']
+ end
+
+ test "create with portable_manifest_text and get manifest_text made up" do
+ authorize_with :active
+ post :create, {
+ collection: {
+ portable_manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+ uuid: "9458070263ca8298748bf27253a6c469"
+ }
+ }
+ assert_response :success
+ assert_nil assigns(:objects)
+ resp = JSON.parse(@response.body)
+ assert_equal '9458070263ca8298748bf27253a6c469+49', resp['uuid']
+ assert_equal resp['portable_manifest_text'], resp['manifest_text']
+ end
+
+ test "create with portable_manifest_text and manifest_text supplied" do
+ authorize_with :active
+ post :create, {
+ collection: {
+ portable_manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+ manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy 0:0:foo.txt\n",
+ uuid: "9458070263ca8298748bf27253a6c469"
+ }
+ }
+ assert_response :success
+ assert_nil assigns(:objects)
+ resp = JSON.parse(@response.body)
+ assert_equal '9458070263ca8298748bf27253a6c469+49', resp['uuid']
+ assert_nil resp['portable_manifest_text'].index '+K at xyzzy '
+ assert_not_nil resp['manifest_text'].index '+K at xyzzy '
+ end
+
test "get full provenance for baz file" do
authorize_with :active
get :provenance, uuid: 'ea10d51bcf88862dbcc36eb292017dfd+45'
commit d14496cb38b2a61674e4ce2d3d9b8aa1c54ba322
Author: Tom Clegg <tom at curoverse.com>
Date: Sun Feb 9 14:17:50 2014 -0800
Add collections.portable_manifest_text column.
diff --git a/services/api/db/migrate/20140209212819_add_portable_manifest_text_to_collections.rb b/services/api/db/migrate/20140209212819_add_portable_manifest_text_to_collections.rb
new file mode 100644
index 0000000..aa0dd0a
--- /dev/null
+++ b/services/api/db/migrate/20140209212819_add_portable_manifest_text_to_collections.rb
@@ -0,0 +1,15 @@
+class AddPortableManifestTextToCollections < ActiveRecord::Migration
+ def up
+ add_column :collections, :portable_manifest_text, :text
+ update_sql <<-EOS
+UPDATE collections
+ SET portable_manifest_text =
+ regexp_replace(manifest_text,'\\+K@[a-z0-9]+', '', 'g')
+ WHERE portable_manifest_text IS NULL
+EOS
+ end
+
+ def down
+ remove_column :collections, :portable_manifest_text
+ end
+end
diff --git a/services/api/db/schema.rb b/services/api/db/schema.rb
index df6ea9b..3e4aaca 100644
--- a/services/api/db/schema.rb
+++ b/services/api/db/schema.rb
@@ -11,7 +11,7 @@
#
# It's strongly recommended to check this file into your version control system.
-ActiveRecord::Schema.define(:version => 20140129184311) do
+ActiveRecord::Schema.define(:version => 20140209212819) do
create_table "api_client_authorizations", :force => true do |t|
t.string "api_token", :null => false
@@ -83,6 +83,7 @@ ActiveRecord::Schema.define(:version => 20140129184311) do
t.datetime "updated_at", :null => false
t.string "uuid"
t.text "manifest_text"
+ t.text "portable_manifest_text"
end
add_index "collections", ["created_at"], :name => "index_collections_on_created_at"
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list