From 9d4c7ff603f22844dc80c7f38c938a678273c9c1 Mon Sep 17 00:00:00 2001 From: Eric Biagiotti Date: Thu, 30 May 2019 14:35:51 -0400 Subject: [PATCH] 15106: Adds trigram index migration Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti --- services/api/app/models/arvados_model.rb | 12 ++++ ...80148_add_trigram_index_for_text_search.rb | 29 ++++++++ services/api/db/structure.sql | 67 ++++++++++++++++++- 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb index 339bc9e23f..bd2ac3fad2 100644 --- a/services/api/app/models/arvados_model.rb +++ b/services/api/app/models/arvados_model.rb @@ -420,6 +420,18 @@ class ArvadosModel < ApplicationRecord end.map(&:name) end + def self.full_text_coalesce + full_text_searchable_columns.collect do |column| + is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb + cast = (is_jsonb || serialized_attributes[column]) ? '::text' : '' + "coalesce(#{column}#{cast},'')" + end + end + + def self.full_text_trgm + "(#{full_text_coalesce.join(" || ' ' || ")})" + end + def self.full_text_tsvector parts = full_text_searchable_columns.collect do |column| is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb diff --git a/services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb b/services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb new file mode 100644 index 0000000000..ef8be5f92d --- /dev/null +++ b/services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb @@ -0,0 +1,29 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class AddTrigramIndexForTextSearch < ActiveRecord::Migration[5.0] + def trgm_indexes + { + "collections" => "collections_trgm_text_search_idx", + "container_requests" => "container_requests_trgm_text_search_idx", + "groups" => "groups_trgm_text_search_idx", + "jobs" => "jobs_trgm_text_search_idx", + "pipeline_instances" => "pipeline_instances_trgm_text_search_idx", + "pipeline_templates" => "pipeline_templates_trgm_text_search_idx", + "workflows" => "workflows_trgm_text_search_idx", + } + end + + def up + trgm_indexes.each do |model, indx| + execute "CREATE INDEX #{indx} ON #{model} USING gin((#{model.classify.constantize.full_text_trgm}) gin_trgm_ops)" + end + end + + def down + trgm_indexes.each do |_, indx| + execute "DROP INDEX IF EXISTS #{indx}" + end + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index 0408b5265b..9bb059c2a9 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -7,6 +7,7 @@ SET client_encoding = 'UTF8'; SET standard_conforming_strings = on; SELECT pg_catalog.set_config('search_path', '', false); SET check_function_bodies = false; +SET xmloption = content; SET client_min_messages = warning; -- @@ -23,6 +24,20 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; -- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; +-- +-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; + + +-- +-- Name: EXTENSION pg_trgm; Type: COMMENT; Schema: -; Owner: - +-- + +-- COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams'; + + SET default_tablespace = ''; SET default_with_oids = false; @@ -1664,6 +1679,13 @@ CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (t CREATE INDEX collections_search_index ON public.collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, current_version_uuid); +-- +-- Name: collections_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops); + + -- -- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: - -- @@ -1685,6 +1707,13 @@ CREATE INDEX container_requests_index_on_properties ON public.container_requests CREATE INDEX container_requests_search_index ON public.container_requests USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, state, requesting_container_uuid, container_uuid, container_image, cwd, output_path, output_uuid, log_uuid, output_name); +-- +-- Name: container_requests_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX container_requests_trgm_text_search_idx ON public.container_requests USING gin (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)) public.gin_trgm_ops); + + -- -- Name: containers_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -1713,6 +1742,13 @@ CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector CREATE INDEX groups_search_index ON public.groups USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, group_class); +-- +-- Name: groups_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX groups_trgm_text_search_idx ON public.groups USING gin (((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text))) public.gin_trgm_ops); + + -- -- Name: humans_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2700,6 +2736,13 @@ CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('en CREATE INDEX jobs_search_index ON public.jobs USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, submit_id, script, script_version, cancelled_by_client_uuid, cancelled_by_user_uuid, output, is_locked_by_uuid, log, repository, supplied_script_version, docker_image_locator, state, arvados_sdk_version); +-- +-- Name: jobs_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX jobs_trgm_text_search_idx ON public.jobs USING gin (((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))) public.gin_trgm_ops); + + -- -- Name: keep_disks_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2791,6 +2834,13 @@ CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instance CREATE INDEX pipeline_instances_search_index ON public.pipeline_instances USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, pipeline_template_uuid, name, state); +-- +-- Name: pipeline_instances_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instances USING gin (((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops); + + -- -- Name: pipeline_template_owner_uuid_name_unique; Type: INDEX; Schema: public; Owner: - -- @@ -2812,6 +2862,13 @@ CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_template CREATE INDEX pipeline_templates_search_index ON public.pipeline_templates USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name); +-- +-- Name: pipeline_templates_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX pipeline_templates_trgm_text_search_idx ON public.pipeline_templates USING gin (((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops); + + -- -- Name: repositories_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2868,6 +2925,13 @@ CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_ts CREATE INDEX workflows_search_idx ON public.workflows USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name); +-- +-- Name: workflows_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX workflows_trgm_text_search_idx ON public.workflows USING gin (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text))) public.gin_trgm_ops); + + -- -- PostgreSQL database dump complete -- @@ -3060,6 +3124,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20181213183234'), ('20190214214814'), ('20190322174136'), -('20190422144631'); +('20190422144631'), +('20190523180148'); -- 2.30.2