[ARVADOS] created: ff963e6ce16fc276cb41b5fb66f8febdbae30d95

git at public.curoverse.com git at public.curoverse.com
Sun Feb 9 19:00:06 EST 2014


        at  ff963e6ce16fc276cb41b5fb66f8febdbae30d95 (commit)


commit ff963e6ce16fc276cb41b5fb66f8febdbae30d95
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 9 15:29:00 2014 -0800

    When retrieving a blob, if local Keep servers don't have it and the
    locator has +K at xyzzy, try GET http://keep.xyzzy.arvadosapi.com/hash.

diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index b2bf3b4..e1902d1 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -174,26 +174,38 @@ class KeepClient(object):
             return KeepClient.local_store_get(locator)
         expect_hash = re.sub(r'\+.*', '', locator)
         for service_root in self.shuffled_service_roots(expect_hash):
-            h = httplib2.Http()
             url = service_root + expect_hash
             api_token = config.get('ARVADOS_API_TOKEN')
             headers = {'Authorization': "OAuth2 %s" % api_token,
                        'Accept': 'application/octet-stream'}
-            try:
-                resp, content = h.request(url.encode('utf-8'), 'GET',
-                                          headers=headers)
-                if re.match(r'^2\d\d$', resp['status']):
-                    m = hashlib.new('md5')
-                    m.update(content)
-                    md5 = m.hexdigest()
-                    if md5 == expect_hash:
-                        return content
-                    logging.warning("Checksum fail: md5(%s) = %s" % (url, md5))
-            except (httplib2.HttpLib2Error, httplib.ResponseNotReady) as e:
-                logging.info("Request fail: GET %s => %s: %s" %
-                             (url, type(e), str(e)))
+            blob = self.get_url(url, headers, expect_hash)
+            if blob:
+                return blob
+        for location_hint in re.finditer(r'\+K@([a-z0-9]+)', locator):
+            instance = location_hint.group(1)
+            url = 'http://keep.' + instance + '.arvadosapi.com/' + expect_hash
+            blob = self.get_url(url, {}, expect_hash)
+            if blob:
+                return blob
         raise arvados.errors.NotFoundError("Block not found: %s" % expect_hash)
 
+    def get_url(self, url, headers, expect_hash):
+        h = httplib2.Http()
+        try:
+            resp, content = h.request(url.encode('utf-8'), 'GET',
+                                      headers=headers)
+            if re.match(r'^2\d\d$', resp['status']):
+                m = hashlib.new('md5')
+                m.update(content)
+                md5 = m.hexdigest()
+                if md5 == expect_hash:
+                    return content
+                logging.warning("Checksum fail: md5(%s) = %s" % (url, md5))
+        except Exception as e:
+            logging.info("Request fail: GET %s => %s: %s" %
+                         (url, type(e), str(e)))
+        return None
+
     def put(self, data, **kwargs):
         if 'KEEP_LOCAL_STORE' in os.environ:
             return KeepClient.local_store_put(data)

commit 7865079f2d9a33368099037b7dfc1676145f09e0
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 9 14:54:58 2014 -0800

    Retrieve manifest_text from API server. If that fails, emit a warning
    and fall back to reading directly from Keep.
    
    This gives the API server an opportunity to provide additional
    metadata, like hints about where the data blobs are stored.

diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 8e39318..ea98d00 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -43,7 +43,14 @@ class CollectionReader(object):
         if self._streams != None:
             return
         if not self._manifest_text:
-            self._manifest_text = Keep.get(self._manifest_locator)
+            try:
+                c = arvados.api('v1').collections().get(
+                    uuid=self._manifest_locator).execute()
+                self._manifest_text = c['manifest_text']
+            except Exception as e:
+                logging.warning("API lookup failed for collection %s (%s: %s)" %
+                                (self._manifest_locator, type(e), str(e)))
+                self._manifest_text = Keep.get(self._manifest_locator)
         self._streams = []
         for stream_line in self._manifest_text.split("\n"):
             if stream_line != '':
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get
index 4154a3d..30beedc 100755
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -124,7 +124,16 @@ if not get_prefix:
                 logger.error('Local file %s already exists' % args.destination)
                 sys.exit(1)
             with open(args.destination, 'wb') as f:
-                f.write(arvados.Keep.get(collection))
+                try:
+                    c = arvados.api('v1').collections().get(
+                        uuid=collection).execute()
+                    manifest = c['manifest_text']
+                except Exception as e:
+                    logging.warning(
+                        "API lookup failed for collection %s (%s: %s)" %
+                        (collection, type(e), str(e)))
+                    manifest = arvados.Keep.get(collection)
+                f.write(manifest)
         sys.exit(0)
     except arvados.errors.NotFoundError as e:
         logger.error(e)

commit be64bab4a991a6a48120d868c23e43d9024b6103
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 9 14:36:51 2014 -0800

    Include manifest_text and portable_manifest_text in collections.get
    API response, and allow client to provide either one or both (and a
    uuid matching either one) during collections.create.

diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index feed5ce..454ee18 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -51,6 +51,10 @@ class Arvados::V1::CollectionsController < ApplicationController
     show
   end
 
+  def show
+    render json: @object.as_api_response(:with_data)
+  end
+
   def collection_uuid(uuid)
     m = /([a-f0-9]{32}(\+[0-9]+)?)(\+.*)?/.match(uuid)
     if m
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 03e5e4e..47e4c8d 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -8,6 +8,11 @@ class Collection < ArvadosModel
     t.add :files
   end
 
+  api_accessible :with_data, extend: :user do |t|
+    t.add :portable_manifest_text
+    t.add :manifest_text
+  end
+
   def redundancy_status
     if redundancy_confirmed_as.nil?
       'unconfirmed'
@@ -25,6 +30,17 @@ class Collection < ArvadosModel
   end
 
   def assign_uuid
+    # The client may provide either a portable or a non-portable
+    # manifest or both, as long as the given UUID matches one of
+    # them. If only one is provided, we make up something reasonable
+    # for the other. This behavior allows the client to expect all
+    # three of "uuid given == uuid stored in database", "uuid is
+    # stable when non-portable manifest changes, if md5(portable
+    # manifest) given", and "other clients see the +K at xyzzy hints I
+    # provide until the api server takes charge of them".
+    self.manifest_text ||= portable_manifest_text
+    self.portable_manifest_text ||= manifest_text.andand.gsub /\+K@[a-z0-9]+/, ''
+
     if self.manifest_text.nil? and self.uuid.nil?
       super
     elsif self.manifest_text and self.uuid
@@ -32,15 +48,18 @@ class Collection < ArvadosModel
       if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
         self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
         true
+      elsif self.uuid == Digest::MD5.hexdigest(self.portable_manifest_text)
+        self.uuid.gsub! /$/, '+' + self.portable_manifest_text.length.to_s
+        true
       else
-        errors.add :uuid, 'uuid does not match checksum of manifest_text'
+        errors.add :uuid, 'does not match checksum of manifest_text or portable_manifest_text'
         false
       end
     elsif self.manifest_text
-      errors.add :uuid, 'checksum for manifest_text not supplied in uuid'
+      errors.add :uuid, 'not supplied to match manifest_text'
       false
     else
-      errors.add :manifest_text, 'manifest_text not supplied'
+      errors.add :manifest_text, 'not supplied'
       false
     end
   end
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index bffb47a..3573ca2 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -103,6 +103,66 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
     assert_response 422
   end
 
+  test "create with checksum matching portable manifest" do
+    authorize_with :active
+    post :create, {
+      collection: {
+        manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy 0:0:foo.txt\n",
+        uuid: "9458070263ca8298748bf27253a6c469+57"
+      }
+    }
+    assert_response :success
+    assert_nil assigns(:objects)
+    resp = JSON.parse(@response.body)
+    assert_equal '9458070263ca8298748bf27253a6c469+49', resp['uuid']
+  end
+
+  test "create with checksum matching non-portable manifest" do
+    authorize_with :active
+    post :create, {
+      collection: {
+        manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy 0:0:foo.txt\n",
+        uuid: "fbf92328d90daac6360915f374894456"
+      }
+    }
+    assert_response :success
+    assert_nil assigns(:objects)
+    resp = JSON.parse(@response.body)
+    assert_equal 'fbf92328d90daac6360915f374894456+57', resp['uuid']
+  end
+
+  test "create with portable_manifest_text and get manifest_text made up" do
+    authorize_with :active
+    post :create, {
+      collection: {
+        portable_manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+        uuid: "9458070263ca8298748bf27253a6c469"
+      }
+    }
+    assert_response :success
+    assert_nil assigns(:objects)
+    resp = JSON.parse(@response.body)
+    assert_equal '9458070263ca8298748bf27253a6c469+49', resp['uuid']
+    assert_equal resp['portable_manifest_text'], resp['manifest_text']
+  end
+
+  test "create with portable_manifest_text and manifest_text supplied" do
+    authorize_with :active
+    post :create, {
+      collection: {
+        portable_manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+        manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy 0:0:foo.txt\n",
+        uuid: "9458070263ca8298748bf27253a6c469"
+      }
+    }
+    assert_response :success
+    assert_nil assigns(:objects)
+    resp = JSON.parse(@response.body)
+    assert_equal '9458070263ca8298748bf27253a6c469+49', resp['uuid']
+    assert_nil resp['portable_manifest_text'].index '+K at xyzzy '
+    assert_not_nil resp['manifest_text'].index '+K at xyzzy '
+  end
+
   test "get full provenance for baz file" do
     authorize_with :active
     get :provenance, uuid: 'ea10d51bcf88862dbcc36eb292017dfd+45'

commit d14496cb38b2a61674e4ce2d3d9b8aa1c54ba322
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 9 14:17:50 2014 -0800

    Add collections.portable_manifest_text column.

diff --git a/services/api/db/migrate/20140209212819_add_portable_manifest_text_to_collections.rb b/services/api/db/migrate/20140209212819_add_portable_manifest_text_to_collections.rb
new file mode 100644
index 0000000..aa0dd0a
--- /dev/null
+++ b/services/api/db/migrate/20140209212819_add_portable_manifest_text_to_collections.rb
@@ -0,0 +1,15 @@
+class AddPortableManifestTextToCollections < ActiveRecord::Migration
+  def up
+    add_column :collections, :portable_manifest_text, :text
+    update_sql <<-EOS
+UPDATE collections
+ SET portable_manifest_text =
+     regexp_replace(manifest_text,'\\+K@[a-z0-9]+', '', 'g')
+ WHERE portable_manifest_text IS NULL
+EOS
+  end
+
+  def down
+    remove_column :collections, :portable_manifest_text
+  end
+end
diff --git a/services/api/db/schema.rb b/services/api/db/schema.rb
index df6ea9b..3e4aaca 100644
--- a/services/api/db/schema.rb
+++ b/services/api/db/schema.rb
@@ -11,7 +11,7 @@
 #
 # It's strongly recommended to check this file into your version control system.
 
-ActiveRecord::Schema.define(:version => 20140129184311) do
+ActiveRecord::Schema.define(:version => 20140209212819) do
 
   create_table "api_client_authorizations", :force => true do |t|
     t.string   "api_token",                                           :null => false
@@ -83,6 +83,7 @@ ActiveRecord::Schema.define(:version => 20140129184311) do
     t.datetime "updated_at",                          :null => false
     t.string   "uuid"
     t.text     "manifest_text"
+    t.text     "portable_manifest_text"
   end
 
   add_index "collections", ["created_at"], :name => "index_collections_on_created_at"

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list