[ARVADOS] created: 1.2.0-56-g6525b5098
Git user
git at public.curoverse.com
Fri Aug 24 11:41:20 EDT 2018
at 6525b509825dbbf1cbe8b30b34080aafc4e5bde3 (commit)
commit 6525b509825dbbf1cbe8b30b34080aafc4e5bde3
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Fri Aug 24 11:36:10 2018 -0400
14009: Index containers table for reuse searches.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index b9edeae06..e43978980 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -596,16 +596,24 @@ class ArvadosModel < ActiveRecord::Base
end
end
- def self.where_serialized(colname, value)
+ def self.where_serialized(colname, value, md5: false)
+ colsql = colname.to_s
+ if md5
+ colsql = "md5(#{colsql})"
+ end
if value.empty?
# rails4 stores as null, rails3 stored as serialized [] or {}
- sql = "#{colname.to_s} is null or #{colname.to_s} IN (?)"
+ sql = "#{colsql} is null or #{colsql} IN (?)"
sorted = value
else
- sql = "#{colname.to_s} IN (?)"
+ sql = "#{colsql} IN (?)"
sorted = deep_sort_hash(value)
end
- where(sql, [sorted.to_yaml, SafeJSON.dump(sorted)])
+ params = [sorted.to_yaml, SafeJSON.dump(sorted)]
+ if md5
+ params = params.map { |x| Digest::MD5.hexdigest(x) }
+ end
+ where(sql, params)
end
Serializer = {
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 7ec9845bc..7176bda92 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -228,13 +228,13 @@ class Container < ArvadosModel
def self.find_reusable(attrs)
log_reuse_info { "starting with #{Container.all.count} container records in database" }
- candidates = Container.where_serialized(:command, attrs[:command])
+ candidates = Container.where_serialized(:command, attrs[:command], md5: true)
log_reuse_info(candidates) { "after filtering on command #{attrs[:command].inspect}" }
candidates = candidates.where('cwd = ?', attrs[:cwd])
log_reuse_info(candidates) { "after filtering on cwd #{attrs[:cwd].inspect}" }
- candidates = candidates.where_serialized(:environment, attrs[:environment])
+ candidates = candidates.where_serialized(:environment, attrs[:environment], md5: true)
log_reuse_info(candidates) { "after filtering on environment #{attrs[:environment].inspect}" }
candidates = candidates.where('output_path = ?', attrs[:output_path])
@@ -244,13 +244,14 @@ class Container < ArvadosModel
candidates = candidates.where('container_image = ?', image)
log_reuse_info(candidates) { "after filtering on container_image #{image.inspect} (resolved from #{attrs[:container_image].inspect})" }
- candidates = candidates.where_serialized(:mounts, resolve_mounts(attrs[:mounts]))
+ candidates = candidates.where_serialized(:mounts, resolve_mounts(attrs[:mounts]), md5: true)
log_reuse_info(candidates) { "after filtering on mounts #{attrs[:mounts].inspect}" }
- candidates = candidates.where('secret_mounts_md5 = ?', Digest::MD5.hexdigest(SafeJSON.dump(self.deep_sort_hash(attrs[:secret_mounts]))))
- log_reuse_info(candidates) { "after filtering on mounts #{attrs[:mounts].inspect}" }
+ secret_mounts_md5 = Digest::MD5.hexdigest(SafeJSON.dump(self.deep_sort_hash(attrs[:secret_mounts])))
+ candidates = candidates.where('secret_mounts_md5 = ?', secret_mounts_md5)
+ log_reuse_info(candidates) { "after filtering on secret_mounts_md5 #{secret_mounts_md5.inspect}" }
- candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]))
+ candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]), md5: true)
log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
log_reuse_info { "checking for state=Complete with readable output and log..." }
diff --git a/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb b/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb
new file mode 100644
index 000000000..a58932e35
--- /dev/null
+++ b/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb
@@ -0,0 +1,8 @@
+class AddMd5IndexToContainers < ActiveRecord::Migration
+ def up
+ ActiveRecord::Base.connection.execute 'CREATE INDEX index_containers_on_reuse_columns on containers (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints))'
+ end
+ def down
+ ActiveRecord::Base.connection.execute 'DROP INDEX index_containers_on_reuse_columns'
+ end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index d7ee1532d..f42dd8d7c 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -1914,6 +1914,13 @@ CREATE INDEX index_containers_on_owner_uuid ON public.containers USING btree (ow
--
+-- Name: index_containers_on_reuse_columns; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_containers_on_reuse_columns ON public.containers USING btree (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints));
+
+
+--
-- Name: index_containers_on_secret_mounts_md5; Type: INDEX; Schema: public; Owner: -
--
@@ -3125,3 +3132,5 @@ INSERT INTO schema_migrations (version) VALUES ('20180820130357');
INSERT INTO schema_migrations (version) VALUES ('20180820135808');
+INSERT INTO schema_migrations (version) VALUES ('20180824152014');
+
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list