14009: Index containers table for reuse searches.
authorTom Clegg <tclegg@veritasgenetics.com>
Fri, 24 Aug 2018 15:36:10 +0000 (11:36 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 24 Aug 2018 15:36:10 +0000 (11:36 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

services/api/app/models/arvados_model.rb
services/api/app/models/container.rb
services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb [new file with mode: 0644]
services/api/db/structure.sql

index b9edeae06ecf93f0a1b6014eef0772224deed1a1..e43978980125a0fc91b22259ba34d8bb96963c13 100644 (file)
@@ -596,16 +596,24 @@ class ArvadosModel < ActiveRecord::Base
     end
   end
 
-  def self.where_serialized(colname, value)
+  def self.where_serialized(colname, value, md5: false)
+    colsql = colname.to_s
+    if md5
+      colsql = "md5(#{colsql})"
+    end
     if value.empty?
       # rails4 stores as null, rails3 stored as serialized [] or {}
-      sql = "#{colname.to_s} is null or #{colname.to_s} IN (?)"
+      sql = "#{colsql} is null or #{colsql} IN (?)"
       sorted = value
     else
-      sql = "#{colname.to_s} IN (?)"
+      sql = "#{colsql} IN (?)"
       sorted = deep_sort_hash(value)
     end
-    where(sql, [sorted.to_yaml, SafeJSON.dump(sorted)])
+    params = [sorted.to_yaml, SafeJSON.dump(sorted)]
+    if md5
+      params = params.map { |x| Digest::MD5.hexdigest(x) }
+    end
+    where(sql, params)
   end
 
   Serializer = {
index 7ec9845bc1983c0819f4d801e5044d8e5765f00f..7176bda926d8aa1cdb0c499958ae4ff6ffc97850 100644 (file)
@@ -228,13 +228,13 @@ class Container < ArvadosModel
 
   def self.find_reusable(attrs)
     log_reuse_info { "starting with #{Container.all.count} container records in database" }
-    candidates = Container.where_serialized(:command, attrs[:command])
+    candidates = Container.where_serialized(:command, attrs[:command], md5: true)
     log_reuse_info(candidates) { "after filtering on command #{attrs[:command].inspect}" }
 
     candidates = candidates.where('cwd = ?', attrs[:cwd])
     log_reuse_info(candidates) { "after filtering on cwd #{attrs[:cwd].inspect}" }
 
-    candidates = candidates.where_serialized(:environment, attrs[:environment])
+    candidates = candidates.where_serialized(:environment, attrs[:environment], md5: true)
     log_reuse_info(candidates) { "after filtering on environment #{attrs[:environment].inspect}" }
 
     candidates = candidates.where('output_path = ?', attrs[:output_path])
@@ -244,13 +244,14 @@ class Container < ArvadosModel
     candidates = candidates.where('container_image = ?', image)
     log_reuse_info(candidates) { "after filtering on container_image #{image.inspect} (resolved from #{attrs[:container_image].inspect})" }
 
-    candidates = candidates.where_serialized(:mounts, resolve_mounts(attrs[:mounts]))
+    candidates = candidates.where_serialized(:mounts, resolve_mounts(attrs[:mounts]), md5: true)
     log_reuse_info(candidates) { "after filtering on mounts #{attrs[:mounts].inspect}" }
 
-    candidates = candidates.where('secret_mounts_md5 = ?', Digest::MD5.hexdigest(SafeJSON.dump(self.deep_sort_hash(attrs[:secret_mounts]))))
-    log_reuse_info(candidates) { "after filtering on mounts #{attrs[:mounts].inspect}" }
+    secret_mounts_md5 = Digest::MD5.hexdigest(SafeJSON.dump(self.deep_sort_hash(attrs[:secret_mounts])))
+    candidates = candidates.where('secret_mounts_md5 = ?', secret_mounts_md5)
+    log_reuse_info(candidates) { "after filtering on secret_mounts_md5 #{secret_mounts_md5.inspect}" }
 
-    candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]))
+    candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]), md5: true)
     log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
 
     log_reuse_info { "checking for state=Complete with readable output and log..." }
diff --git a/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb b/services/api/db/migrate/20180824152014_add_md5_index_to_containers.rb
new file mode 100644 (file)
index 0000000..a58932e
--- /dev/null
@@ -0,0 +1,8 @@
+class AddMd5IndexToContainers < ActiveRecord::Migration
+  def up
+    ActiveRecord::Base.connection.execute 'CREATE INDEX index_containers_on_reuse_columns on containers (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints))'
+  end
+  def down
+    ActiveRecord::Base.connection.execute 'DROP INDEX index_containers_on_reuse_columns'
+  end
+end
index d7ee1532d0f3807338421824e4564dd35ec4ada8..f42dd8d7cfa94669508c907538c01a0e644b2083 100644 (file)
@@ -1913,6 +1913,13 @@ CREATE INDEX index_containers_on_modified_at_uuid ON public.containers USING btr
 CREATE INDEX index_containers_on_owner_uuid ON public.containers USING btree (owner_uuid);
 
 
+--
+-- Name: index_containers_on_reuse_columns; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_containers_on_reuse_columns ON public.containers USING btree (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints));
+
+
 --
 -- Name: index_containers_on_secret_mounts_md5; Type: INDEX; Schema: public; Owner: -
 --
@@ -3125,3 +3132,5 @@ INSERT INTO schema_migrations (version) VALUES ('20180820130357');
 
 INSERT INTO schema_migrations (version) VALUES ('20180820135808');
 
+INSERT INTO schema_migrations (version) VALUES ('20180824152014');
+