[ARVADOS] created: 1.2.0-56-g6525b5098

Git user git at public.curoverse.com
Fri Aug 24 11:41:20 EDT 2018


        at  6525b509825dbbf1cbe8b30b34080aafc4e5bde3 (commit)


commit 6525b509825dbbf1cbe8b30b34080aafc4e5bde3
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Aug 24 11:36:10 2018 -0400

    14009: Index containers table for reuse searches.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index b9edeae06..e43978980 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -596,16 +596,24 @@ class ArvadosModel < ActiveRecord::Base
     end
   end
 
-  def self.where_serialized(colname, value)
+  def self.where_serialized(colname, value, md5: false)
+    colsql = colname.to_s
+    if md5
+      colsql = "md5(#{colsql})"
+    end
     if value.empty?
       # rails4 stores as null, rails3 stored as serialized [] or {}
-      sql = "#{colname.to_s} is null or #{colname.to_s} IN (?)"
+      sql = "#{colsql} is null or #{colsql} IN (?)"
       sorted = value
     else
-      sql = "#{colname.to_s} IN (?)"
+      sql = "#{colsql} IN (?)"
       sorted = deep_sort_hash(value)
     end
-    where(sql, [sorted.to_yaml, SafeJSON.dump(sorted)])
+    params = [sorted.to_yaml, SafeJSON.dump(sorted)]
+    if md5
+      params = params.map { |x| Digest::MD5.hexdigest(x) }
+    end
+    where(sql, params)
   end
 
   Serializer = {
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 7ec9845bc..7176bda92 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -228,13 +228,13 @@ class Container < ArvadosModel
 
   def self.find_reusable(attrs)
     log_reuse_info { "starting with #{Container.all.count} container records in database" }
-    candidates = Container.where_serialized(:command, attrs[:command])
+    candidates = Container.where_serialized(:command, attrs[:command], md5: true)
     log_reuse_info(candidates) { "after filtering on command #{attrs[:command].inspect}" }
 
     candidates = candidates.where('cwd = ?', attrs[:cwd])
     log_reuse_info(candidates) { "after filtering on cwd #{attrs[:cwd].inspect}" }
 
-    candidates = candidates.where_serialized(:environment, attrs[:environment])
+    candidates = candidates.where_serialized(:environment, attrs[:environment], md5: true)
     log_reuse_info(candidates) { "after filtering on environment #{attrs[:environment].inspect}" }
 
     candidates = candidates.where('output_path = ?', attrs[:output_path])
@@ -244,13 +244,14 @@ class Container < ArvadosModel
     candidates = candidates.where('container_image = ?', image)
     log_reuse_info(candidates) { "after filtering on container_image #{image.inspect} (resolved from #{attrs[:container_image].inspect})" }
 
-    candidates = candidates.where_serialized(:mounts, resolve_mounts(attrs[:mounts]))
+    candidates = candidates.where_serialized(:mounts, resolve_mounts(attrs[:mounts]), md5: true)
     log_reuse_info(candidates) { "after filtering on mounts #{attrs[:mounts].inspect}" }
 
-    candidates = candidates.where('secret_mounts_md5 = ?', Digest::MD5.hexdigest(SafeJSON.dump(self.deep_sort_hash(attrs[:secret_mounts]))))
-    log_reuse_info(candidates) { "after filtering on mounts #{attrs[:mounts].inspect}" }
+    secret_mounts_md5 = Digest::MD5.hexdigest(SafeJSON.dump(self.deep_sort_hash(attrs[:secret_mounts])))
+    candidates = candidates.where('secret_mounts_md5 = ?', secret_mounts_md5)
+    log_reuse_info(candidates) { "after filtering on secret_mounts_md5 #{secret_mounts_md5.inspect}" }
 
-    candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]))
+    candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]), md5: true)
     log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
 
     log_reuse_info { "checking for state=Complete with readable output and log..." }
diff --git a/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb b/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb
new file mode 100644
index 000000000..a58932e35
--- /dev/null
+++ b/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb
@@ -0,0 +1,8 @@
+class AddMd5IndexToContainers < ActiveRecord::Migration
+  def up
+    ActiveRecord::Base.connection.execute 'CREATE INDEX index_containers_on_reuse_columns on containers (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints))'
+  end
+  def down
+    ActiveRecord::Base.connection.execute 'DROP INDEX index_containers_on_reuse_columns'
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index d7ee1532d..f42dd8d7c 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -1914,6 +1914,13 @@ CREATE INDEX index_containers_on_owner_uuid ON public.containers USING btree (ow
 
 
 --
+-- Name: index_containers_on_reuse_columns; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_containers_on_reuse_columns ON public.containers USING btree (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints));
+
+
+--
 -- Name: index_containers_on_secret_mounts_md5; Type: INDEX; Schema: public; Owner: -
 --
 
@@ -3125,3 +3132,5 @@ INSERT INTO schema_migrations (version) VALUES ('20180820130357');
 
 INSERT INTO schema_migrations (version) VALUES ('20180820135808');
 
+INSERT INTO schema_migrations (version) VALUES ('20180824152014');
+

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list