From: Eric Biagiotti Date: Thu, 13 Jun 2019 17:22:37 +0000 (-0400) Subject: Merge remote-tracking branch 'origin/master' into 15106-trgm-text-search X-Git-Tag: 2.0.0~294^2~3 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/b4b8e120c8ac43a16513079e2ae9f46bcdb9c35d?hp=21dfd2339fb0a4f501b43beedf3207c5d30aae1b Merge remote-tracking branch 'origin/master' into 15106-trgm-text-search refs #15106 Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti --- diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb index efef7b8120..91c5a1923c 100644 --- a/services/api/app/models/arvados_model.rb +++ b/services/api/app/models/arvados_model.rb @@ -419,6 +419,18 @@ class ArvadosModel < ApplicationRecord end.map(&:name) end + def self.full_text_coalesce + full_text_searchable_columns.collect do |column| + is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb + cast = (is_jsonb || serialized_attributes[column]) ? '::text' : '' + "coalesce(#{column}#{cast},'')" + end + end + + def self.full_text_trgm + "(#{full_text_coalesce.join(" || ' ' || ")})" + end + def self.full_text_tsvector parts = full_text_searchable_columns.collect do |column| is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb diff --git a/services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb b/services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb new file mode 100644 index 0000000000..881350238c --- /dev/null +++ b/services/api/db/migrate/20190523180148_add_trigram_index_for_text_search.rb @@ -0,0 +1,44 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class AddTrigramIndexForTextSearch < ActiveRecord::Migration[5.0] + def trgm_indexes + { + "collections" => "collections_trgm_text_search_idx", + "container_requests" => "container_requests_trgm_text_search_idx", + "groups" => "groups_trgm_text_search_idx", + "jobs" => "jobs_trgm_text_search_idx", + "pipeline_instances" => "pipeline_instances_trgm_text_search_idx", + "pipeline_templates" => "pipeline_templates_trgm_text_search_idx", + "workflows" => "workflows_trgm_text_search_idx", + } + end + + def up + begin + execute "CREATE EXTENSION IF NOT EXISTS pg_trgm" + rescue ActiveRecord::StatementInvalid => e + puts "Cannot create the pg_trgm extension." + if e.cause.is_a?(PG::InsufficientPrivilege) + puts "The user must have a SUPERUSER role." + elsif e.cause.is_a?(PG::UndefinedFile) + puts "The postgresql-contrib package is most likely not installed." + else + puts "Unknown Error." + end + puts "Please visit https://doc.arvados.org/admin/upgrading.html for instructions on how to run this migration." + throw e + end + + trgm_indexes.each do |model, indx| + execute "CREATE INDEX #{indx} ON #{model} USING gin((#{model.classify.constantize.full_text_trgm}) gin_trgm_ops)" + end + end + + def down + trgm_indexes.each do |_, indx| + execute "DROP INDEX IF EXISTS #{indx}" + end + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index 0408b5265b..9bb059c2a9 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -7,6 +7,7 @@ SET client_encoding = 'UTF8'; SET standard_conforming_strings = on; SELECT pg_catalog.set_config('search_path', '', false); SET check_function_bodies = false; +SET xmloption = content; SET client_min_messages = warning; -- @@ -23,6 +24,20 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; -- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; +-- +-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; + + +-- +-- Name: EXTENSION pg_trgm; Type: COMMENT; Schema: -; Owner: - +-- + +-- COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams'; + + SET default_tablespace = ''; SET default_with_oids = false; @@ -1664,6 +1679,13 @@ CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (t CREATE INDEX collections_search_index ON public.collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, current_version_uuid); +-- +-- Name: collections_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops); + + -- -- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: - -- @@ -1685,6 +1707,13 @@ CREATE INDEX container_requests_index_on_properties ON public.container_requests CREATE INDEX container_requests_search_index ON public.container_requests USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, state, requesting_container_uuid, container_uuid, container_image, cwd, output_path, output_uuid, log_uuid, output_name); +-- +-- Name: container_requests_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX container_requests_trgm_text_search_idx ON public.container_requests USING gin (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)) public.gin_trgm_ops); + + -- -- Name: containers_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -1713,6 +1742,13 @@ CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector CREATE INDEX groups_search_index ON public.groups USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, group_class); +-- +-- Name: groups_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX groups_trgm_text_search_idx ON public.groups USING gin (((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text))) public.gin_trgm_ops); + + -- -- Name: humans_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2700,6 +2736,13 @@ CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('en CREATE INDEX jobs_search_index ON public.jobs USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, submit_id, script, script_version, cancelled_by_client_uuid, cancelled_by_user_uuid, output, is_locked_by_uuid, log, repository, supplied_script_version, docker_image_locator, state, arvados_sdk_version); +-- +-- Name: jobs_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX jobs_trgm_text_search_idx ON public.jobs USING gin (((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))) public.gin_trgm_ops); + + -- -- Name: keep_disks_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2791,6 +2834,13 @@ CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instance CREATE INDEX pipeline_instances_search_index ON public.pipeline_instances USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, pipeline_template_uuid, name, state); +-- +-- Name: pipeline_instances_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instances USING gin (((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops); + + -- -- Name: pipeline_template_owner_uuid_name_unique; Type: INDEX; Schema: public; Owner: - -- @@ -2812,6 +2862,13 @@ CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_template CREATE INDEX pipeline_templates_search_index ON public.pipeline_templates USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name); +-- +-- Name: pipeline_templates_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX pipeline_templates_trgm_text_search_idx ON public.pipeline_templates USING gin (((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops); + + -- -- Name: repositories_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2868,6 +2925,13 @@ CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_ts CREATE INDEX workflows_search_idx ON public.workflows USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name); +-- +-- Name: workflows_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX workflows_trgm_text_search_idx ON public.workflows USING gin (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text))) public.gin_trgm_ops); + + -- -- PostgreSQL database dump complete -- @@ -3060,6 +3124,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20181213183234'), ('20190214214814'), ('20190322174136'), -('20190422144631'); +('20190422144631'), +('20190523180148'); diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb index 831e357b42..982a9c30b6 100644 --- a/services/api/lib/record_filters.rb +++ b/services/api/lib/record_filters.rb @@ -44,6 +44,14 @@ module RecordFilters cond_out = [] + if attrs_in == 'any' && operator.casecmp('ilike') && (operand.is_a? String) && operand.match('^[%].*[%]$') + # Trigram index search + cond_out << model_class.full_text_trgm + " ilike ?" + param_out << operand + # Skip the generic per-column operator loop below + attrs = [] + end + if operator == '@@' # Full-text search if attrs_in != 'any' diff --git a/services/api/test/unit/arvados_model_test.rb b/services/api/test/unit/arvados_model_test.rb index 7d9da1e561..d447c76c6d 100644 --- a/services/api/test/unit/arvados_model_test.rb +++ b/services/api/test/unit/arvados_model_test.rb @@ -200,6 +200,30 @@ class ArvadosModelTest < ActiveSupport::TestCase end end + [ + %w[collections collections_trgm_text_search_idx], + %w[container_requests container_requests_trgm_text_search_idx], + %w[groups groups_trgm_text_search_idx], + %w[jobs jobs_trgm_text_search_idx], + %w[pipeline_instances pipeline_instances_trgm_text_search_idx], + %w[pipeline_templates pipeline_templates_trgm_text_search_idx], + %w[workflows workflows_trgm_text_search_idx] + ].each do |model| + table = model[0] + indexname = model[1] + test "trigram index exists on #{table} model" do + table_class = table.classify.constantize + expect = table_class.full_text_searchable_columns + ok = false + conn = ActiveRecord::Base.connection + conn.exec_query("SELECT indexdef FROM pg_indexes WHERE tablename = '#{table}' AND indexname = '#{indexname}'").each do |res| + searchable = res['indexdef'].scan(/COALESCE\(+([A-Za-z_]+)/).flatten + ok = (expect == searchable) + assert ok, "Invalid or no trigram index on #{table} named #{indexname}\nexpect: #{expect.inspect}\nfound: #{searchable}" + end + end + end + test "selectable_attributes includes database attributes" do assert_includes(Job.selectable_attributes, "success") end diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base index 65171de3d2..758bcbc5f2 100644 --- a/tools/arvbox/lib/arvbox/docker/Dockerfile.base +++ b/tools/arvbox/lib/arvbox/docker/Dockerfile.base @@ -8,7 +8,7 @@ ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \ - postgresql-9.6 git build-essential runit curl libpq-dev \ + postgresql-9.6 postgresql-contrib-9.6 git build-essential runit curl libpq-dev \ libcurl4-openssl-dev libssl1.0-dev zlib1g-dev libpcre3-dev \ openssh-server python-setuptools netcat-traditional \ python-epydoc graphviz bzip2 less sudo virtualenv \ diff --git a/tools/arvbox/lib/arvbox/docker/api-setup.sh b/tools/arvbox/lib/arvbox/docker/api-setup.sh index 482934c915..5b63950cd1 100755 --- a/tools/arvbox/lib/arvbox/docker/api-setup.sh +++ b/tools/arvbox/lib/arvbox/docker/api-setup.sh @@ -77,7 +77,7 @@ database_pw=$(cat /var/lib/arvados/api_database_pw) if ! (psql postgres -c "\du" | grep "^ arvados ") >/dev/null ; then psql postgres -c "create user arvados with password '$database_pw'" - psql postgres -c "ALTER USER arvados CREATEDB;" + psql postgres -c "ALTER USER arvados WITH SUPERUSER;" fi sed "s/password:.*/password: $database_pw/" config/database.yml