[ARVADOS] updated: 26a7558c41a528f977932cf6738c62cfc4edfa6a

git at public.curoverse.com git at public.curoverse.com
Thu Aug 14 11:15:06 EDT 2014


Summary of changes:
 .../20140811184643_collection_use_regular_uuids.rb | 149 ++++++++++++++-------
 1 file changed, 100 insertions(+), 49 deletions(-)

       via  26a7558c41a528f977932cf6738c62cfc4edfa6a (commit)
      from  3251b50fed55eeb1c30fd7bf996f715490ee2104 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 26a7558c41a528f977932cf6738c62cfc4edfa6a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Aug 14 11:15:01 2014 -0400

    3036: More work on migration.  Converts collection->collection provenance links into jobs.  Handles orphan collections;

diff --git a/services/api/db/migrate/20140811184643_collection_use_regular_uuids.rb b/services/api/db/migrate/20140811184643_collection_use_regular_uuids.rb
index 9dc19ff..6ad5b86 100644
--- a/services/api/db/migrate/20140811184643_collection_use_regular_uuids.rb
+++ b/services/api/db/migrate/20140811184643_collection_use_regular_uuids.rb
@@ -6,17 +6,22 @@ class CollectionUseRegularUuids < ActiveRecord::Migration
     add_column :collections, :expire_time, :date
     remove_column :collections, :locator
 
-    # Step 1.  Move manifest hashes into portable_data_hash field
-    ActiveRecord::Base.connection.execute("update collections set portable_data_hash=uuid, uuid=null")
+    say_with_time "Step 1. Move manifest hashes into portable_data_hash field" do
+      ActiveRecord::Base.connection.execute("update collections set portable_data_hash=uuid, uuid=null")
+    end
 
-    # Step 2.  Create new collection objects from the name links in the table.
-    links = ActiveRecord::Base.connection.select_all %{
-select links.uuid, head_uuid, tail_uuid, links.name, manifest_text, links.created_at, links.updated_at
+    say_with_time "Step 2. Create new collection objects from the name links in the table." do
+      from_clause = %{
 from links inner join collections on head_uuid=collections.portable_data_hash
 where link_class='name' and collections.uuid is null
 }
-    links.each do |d|
-      ActiveRecord::Base.connection.execute %{
+      links = ActiveRecord::Base.connection.select_all %{
+select links.uuid, head_uuid, tail_uuid, links.name,
+manifest_text, links.created_at, links.updated_at
+#{from_clause}
+}
+      links.each do |d|
+        ActiveRecord::Base.connection.execute %{
 insert into collections (uuid, portable_data_hash, owner_uuid, name, manifest_text, created_at, updated_at)
 values (#{ActiveRecord::Base.connection.quote Collection.generate_uuid},
 #{ActiveRecord::Base.connection.quote d['head_uuid']},
@@ -26,17 +31,21 @@ values (#{ActiveRecord::Base.connection.quote Collection.generate_uuid},
 #{ActiveRecord::Base.connection.quote d['created_at']},
 #{ActiveRecord::Base.connection.quote d['updated_at']})
 }
-      ActiveRecord::Base.connection.execute("delete from links where uuid=#{ActiveRecord::Base.connection.quote d['uuid']}")
+      end
+      ActiveRecord::Base.connection.execute "delete from links where links.uuid in (select links.uuid #{from_clause})"
     end
 
-    # Step 3.  Create new collection objects from the can_read links in the table.
-    links = ActiveRecord::Base.connection.select_all %{
-select links.uuid, head_uuid, tail_uuid, manifest_text, links.created_at, links.updated_at
+    say_with_time "Step 3. Create new collection objects from the can_read links in the table." do
+      from_clause = %{
 from links inner join collections on head_uuid=collections.portable_data_hash
 where link_class='permission' and links.name='can_read' and collections.uuid is null
 }
-    links.each do |d|
-      ActiveRecord::Base.connection.execute %{
+      links = ActiveRecord::Base.connection.select_all %{
+select links.uuid, head_uuid, tail_uuid, manifest_text, links.created_at, links.updated_at
+#{from_clause}
+}
+      links.each do |d|
+        ActiveRecord::Base.connection.execute %{
 insert into collections (uuid, portable_data_hash, owner_uuid, manifest_text, created_at, updated_at)
 values (#{ActiveRecord::Base.connection.quote Collection.generate_uuid},
 #{ActiveRecord::Base.connection.quote d['head_uuid']},
@@ -45,51 +54,83 @@ values (#{ActiveRecord::Base.connection.quote Collection.generate_uuid},
 #{ActiveRecord::Base.connection.quote d['created_at']},
 #{ActiveRecord::Base.connection.quote d['updated_at']})
 }
-      ActiveRecord::Base.connection.execute("delete from links where uuid=#{ActiveRecord::Base.connection.quote d['uuid']}")
+      end
+      ActiveRecord::Base.connection.execute "delete from links where links.uuid in (select links.uuid #{from_clause})"
     end
 
-    # Step 4.  Delete permission links with tail_uuid of a collection (these records are just invalid)
-    links = ActiveRecord::Base.connection.select_all "select links.uuid from links inner join collections on links.tail_uuid=collections.portable_data_hash where link_class='permission'"
-    links.each do |d|
-      ActiveRecord::Base.connection.execute("delete from links where uuid=#{ActiveRecord::Base.connection.quote d['uuid']}")
+    say_with_time "Step 4. Migrate remaining orphan collection objects" do
+      links = ActiveRecord::Base.connection.select_all %{
+select portable_data_hash, owner_uuid, manifest_text, created_at, updated_at
+from collections
+where uuid is null and portable_data_hash not in (select portable_data_hash from collections where uuid is not null)
+}
+      links.each do |d|
+        ActiveRecord::Base.connection.execute %{
+insert into collections (uuid, portable_data_hash, owner_uuid, manifest_text, created_at, updated_at)
+values (#{ActiveRecord::Base.connection.quote Collection.generate_uuid},
+#{ActiveRecord::Base.connection.quote d['portable_data_hash']},
+#{ActiveRecord::Base.connection.quote d['owner_uuid']},
+#{ActiveRecord::Base.connection.quote d['manifest_text']},
+#{ActiveRecord::Base.connection.quote d['created_at']},
+#{ActiveRecord::Base.connection.quote d['updated_at']})
+}
+      end
     end
 
-    # Step 5. Migrate other links
-    # 5.1 migrate head_uuid that look like collections
-    links = ActiveRecord::Base.connection.select_all %{
-select links.uuid, collections.uuid as coluuid, tail_uuid, link_class, links.properties, links.name, links.created_at, links.updated_at, links.owner_uuid
-from links inner join collections on links.head_uuid=portable_data_hash
-where collections.uuid is not null and links.link_class != 'name' and links.link_class != 'permission'
-}
-    links.each do |d|
+    say_with_time "Step 5. Delete old collection objects." do
+      ActiveRecord::Base.connection.execute("delete from collections where uuid is null")
+    end
+
+    say_with_time "Step 6. Delete permission links where tail_uuid is a collection (invalid records)" do
       ActiveRecord::Base.connection.execute %{
-insert into links (uuid, head_uuid, tail_uuid, link_class, name, properties, created_at, updated_at, owner_uuid)
-values (#{ActiveRecord::Base.connection.quote Link.generate_uuid},
-#{ActiveRecord::Base.connection.quote d['coluuid']},
-#{ActiveRecord::Base.connection.quote d['tail_uuid']},
-#{ActiveRecord::Base.connection.quote d['link_class']},
-#{ActiveRecord::Base.connection.quote d['name']},
-#{ActiveRecord::Base.connection.quote d['properties']},
+delete from links where links.uuid in (select links.uuid
+from links
+where tail_uuid like '________________________________+%' and link_class='permission' )
+}
+    end
+
+    say_with_time "Step 5. Migrate collection -> collection provenance links to jobs" do
+      from_clause = %{
+from links
+where head_uuid like '________________________________+%' and tail_uuid like '________________________________+%' and links.link_class = 'provenance'
+}
+      links = ActiveRecord::Base.connection.select_all %{
+select links.uuid, head_uuid, tail_uuid, links.created_at, links.updated_at, links.owner_uuid
+#{from_clause}
+}
+      links.each do |d|
+        newuuid = Job.generate_uuid
+        ActiveRecord::Base.connection.execute %{
+insert into jobs (uuid, script_parameters, output, running, success, created_at, updated_at, owner_uuid)
+values (#{ActiveRecord::Base.connection.quote newuuid},
+#{ActiveRecord::Base.connection.quote "---\ninput: "+d['tail_uuid']},
+#{ActiveRecord::Base.connection.quote d['head_uuid']},
+#{ActiveRecord::Base.connection.quote false},
+#{ActiveRecord::Base.connection.quote true},
 #{ActiveRecord::Base.connection.quote d['created_at']},
 #{ActiveRecord::Base.connection.quote d['updated_at']},
 #{ActiveRecord::Base.connection.quote d['owner_uuid']})
 }
-      ActiveRecord::Base.connection.execute("delete from links where uuid=#{ActiveRecord::Base.connection.quote d['uuid']}")
+      end
+      ActiveRecord::Base.connection.execute "delete from links where links.uuid in (select links.uuid #{from_clause})"
     end
 
-    # 5.2 migrate tail_uuid that look like collections
-    links = ActiveRecord::Base.connection.select_all %{
-select links.uuid, head_uuid, collections.uuid as coluuid, link_class, links.properties, links.name, links.created_at, links.updated_at, links.owner_uuid
-from links inner join collections on links.tail_uuid=portable_data_hash
-where tail_uuid like '________________________________+%' and collections.uuid is not null and links.link_class != 'name' and links.link_class != 'permission'
-}
-    links.each do |d|
-      newuuid = Link.generate_uuid
-      ActiveRecord::Base.connection.execute %{
+    say_with_time "Step 7. Migrate remaining links with head_uuid pointing to collections" do
+      from_clause = %{
+from links inner join collections on links.head_uuid=portable_data_hash
+where collections.uuid is not null
+}
+      links = ActiveRecord::Base.connection.select_all %{
+select links.uuid, collections.uuid as collectionuuid, tail_uuid, link_class, links.properties,
+links.name, links.created_at, links.updated_at, links.owner_uuid
+#{from_clause}
+}
+      links.each do |d|
+        ActiveRecord::Base.connection.execute %{
 insert into links (uuid, head_uuid, tail_uuid, link_class, name, properties, created_at, updated_at, owner_uuid)
-values (#{ActiveRecord::Base.connection.quote newuuid},
-#{ActiveRecord::Base.connection.quote d['head_uuid']},
-#{ActiveRecord::Base.connection.quote d['coluuid']},
+values (#{ActiveRecord::Base.connection.quote Link.generate_uuid},
+#{ActiveRecord::Base.connection.quote d['collectionuuid']},
+#{ActiveRecord::Base.connection.quote d['tail_uuid']},
 #{ActiveRecord::Base.connection.quote d['link_class']},
 #{ActiveRecord::Base.connection.quote d['name']},
 #{ActiveRecord::Base.connection.quote d['properties']},
@@ -97,11 +138,21 @@ values (#{ActiveRecord::Base.connection.quote newuuid},
 #{ActiveRecord::Base.connection.quote d['updated_at']},
 #{ActiveRecord::Base.connection.quote d['owner_uuid']})
 }
-      ActiveRecord::Base.connection.execute("delete from links where uuid=#{ActiveRecord::Base.connection.quote d['uuid']}")
+      end
+      ActiveRecord::Base.connection.execute "delete from links where links.uuid in (select links.uuid #{from_clause})"
+    end
+
+    say_with_time "Step 8. Validate links table" do
+      links = ActiveRecord::Base.connection.select_all %{
+select links.uuid, head_uuid, tail_uuid, link_class, name
+from links
+where head_uuid like '________________________________+%' or tail_uuid like '________________________________+%'
+}
+      links.each do |d|
+        raise "Bad row #{d}"
+      end
     end
 
-    # Step 7. Delete old collection objects.
-    ActiveRecord::Base.connection.execute("delete from collections where uuid is null")
   end
 
   def down

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list