SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-# Start a shell for the postgres user:
- <notextile><pre>~$ <span class="userinput">sudo -u postgres bash</span></pre></notextile>
-# Generate a new database password:
- <notextile><pre>$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+<ol>
+<li>Start a shell for the postgres user:
+<notextile><pre>~$ <span class="userinput">sudo -u postgres bash</span></pre></notextile>
+</li>
+<li>Generate a new database password:
+<notextile><pre>$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
yourgeneratedpassword
</pre></notextile> Record this. You'll need it when you set up the Rails server later.
-# Create a database user with the password you generated:
+</li>
+<li>Create a database user with the password you generated:
<notextile><pre><code>$ <span class="userinput">createuser --encrypted -R -S --pwprompt {{service_role}}</span>
-Enter password for new role: <span class="userinput">yourgeneratedpassword</span>
-Enter it again: <span class="userinput">yourgeneratedpassword</span>
-</code></pre></notextile>
-# Create a database owned by the new user:
+ Enter password for new role: <span class="userinput">yourgeneratedpassword</span>
+ Enter it again: <span class="userinput">yourgeneratedpassword</span></code></pre></notextile>
+</li>
+<li>Create a database owned by the new user:
<notextile><pre><code>$ <span class="userinput">createdb {{service_database}} -T template0 -E UTF8 -O {{service_role}}</span></code></pre></notextile>
-# Exit the postgres user shell:
+</li>
+{% if use_contrib %}
+<li>Enable the pg_trgm extension
+ <notextile><pre>$ <span class="userinput">psql {{service_database}} -c "CREATE EXTENSION pg_trgm"</span></pre></notextile>
+</li>
+{% endif %}
+<li>Exit the postgres user shell:
<notextile><pre>$ <span class="userinput">exit</span></pre></notextile>
+</li>
+</ol>
h2. General process
+# Consult upgrade notes below to see if any manual configuration updates are necessary.
# Wait for the cluster to be idle and stop Arvados services.
# Install new packages using @apt-get upgrade@ or @yum upgrade@.
# Package installation scripts will perform any necessary data migrations.
-# Consult upgrade notes below to see if any manual configuration updates are necessary.
# Restart Arvados services.
h2. Upgrade notes
You can test if any records in your database are affected by going to the API server directory and running @bundle exec rake symbols:check@. This will report which records contain fields with a leading ':' that would previously have been stripped. If there are records to be updated, you can update the database using @bundle exec rake symbols:stringify@.
+h4. Enabling Postgres trigram indexes
+
+ Feature "#15106":https://dev.arvados.org/issues/15106 improves the speed and functionality of full text search by introducing trigram indexes on text searchable database columns via a migration. Prior to updating, you must first install the postgresql-contrib package on your system and subsequently run the @CREATE EXTENSION pg_trgm@ SQL command on the arvados_production database as a postgres superuser.
+
h3(#v1_4_0). v1.4.0 (2019-06-05)
h4. Populating the new file_count and file_size_total columns on the collections table
{% include 'note_python_sc' %}
# Install PostgreSQL:
- <notextile><pre>~$ <span class="userinput">sudo yum install rh-postgresql95</span>
+ <notextile><pre>~$ <span class="userinput">sudo yum install rh-postgresql95 rh-postgresql95-postgresql-contrib</span>
~$ <span class="userinput">scl enable rh-postgresql95 bash</span></pre></notextile>
# Initialize the database:
<notextile><pre>~$ <span class="userinput">sudo postgresql-setup initdb</span></pre></notextile>
Ubuntu 14.04 (Trusty) requires an updated PostgreSQL version, see "the PostgreSQL ubuntu repository":https://www.postgresql.org/download/linux/ubuntu/
# Install PostgreSQL:
- <notextile><pre>~$ <span class="userinput">sudo apt-get install postgresql</span></pre></notextile>
+ <notextile><pre>~$ <span class="userinput">sudo apt-get install postgresql postgresql-contrib</span></pre></notextile>
# "Set up Arvados credentials and databases":#rails_setup for the services that will use this PostgreSQL install.
<a name="rails_setup"></a>
{% assign service_role = "arvados_sso" %}
{% assign service_database = "arvados_sso_production" %}
+{% assign use_contrib = false %}
{% include 'install_postgres_database' %}
h2(#api). Set up API server credentials and database
{% assign service_role = "arvados" %}
{% assign service_database = "arvados_production" %}
+{% assign use_contrib = true %}
{% include 'install_postgres_database' %}
end.map(&:name)
end
+ def self.full_text_coalesce
+ full_text_searchable_columns.collect do |column|
+ is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb
+ cast = (is_jsonb || serialized_attributes[column]) ? '::text' : ''
+ "coalesce(#{column}#{cast},'')"
+ end
+ end
+
+ def self.full_text_trgm
+ "(#{full_text_coalesce.join(" || ' ' || ")})"
+ end
+
def self.full_text_tsvector
parts = full_text_searchable_columns.collect do |column|
is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddTrigramIndexForTextSearch < ActiveRecord::Migration[5.0]
+ def trgm_indexes
+ {
+ "collections" => "collections_trgm_text_search_idx",
+ "container_requests" => "container_requests_trgm_text_search_idx",
+ "groups" => "groups_trgm_text_search_idx",
+ "jobs" => "jobs_trgm_text_search_idx",
+ "pipeline_instances" => "pipeline_instances_trgm_text_search_idx",
+ "pipeline_templates" => "pipeline_templates_trgm_text_search_idx",
+ "workflows" => "workflows_trgm_text_search_idx",
+ }
+ end
+
+ def up
+ begin
+ execute "CREATE EXTENSION IF NOT EXISTS pg_trgm"
+ rescue ActiveRecord::StatementInvalid => e
+ puts "Cannot create the pg_trgm extension."
+ if e.cause.is_a?(PG::InsufficientPrivilege)
+ puts "The user must have a SUPERUSER role."
+ elsif e.cause.is_a?(PG::UndefinedFile)
+ puts "The postgresql-contrib package is most likely not installed."
+ else
+ puts "Unknown Error."
+ end
+ puts "Please visit https://doc.arvados.org/admin/upgrading.html for instructions on how to run this migration."
+ throw e
+ end
+
+ trgm_indexes.each do |model, indx|
+ execute "CREATE INDEX #{indx} ON #{model} USING gin((#{model.classify.constantize.full_text_trgm}) gin_trgm_ops)"
+ end
+ end
+
+ def down
+ trgm_indexes.each do |_, indx|
+ execute "DROP INDEX IF EXISTS #{indx}"
+ end
+ end
+end
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
+SET xmloption = content;
SET client_min_messages = warning;
--
-- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
+--
+-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: -
+--
+
+CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
+
+
+--
+-- Name: EXTENSION pg_trgm; Type: COMMENT; Schema: -; Owner: -
+--
+
+-- COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams';
+
+
SET default_tablespace = '';
SET default_with_oids = false;
CREATE INDEX collections_search_index ON public.collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, current_version_uuid);
+--
+-- Name: collections_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
+
+
--
-- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX container_requests_search_index ON public.container_requests USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, state, requesting_container_uuid, container_uuid, container_image, cwd, output_path, output_uuid, log_uuid, output_name);
+--
+-- Name: container_requests_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX container_requests_trgm_text_search_idx ON public.container_requests USING gin (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)) public.gin_trgm_ops);
+
+
--
-- Name: containers_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX groups_search_index ON public.groups USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, group_class);
+--
+-- Name: groups_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX groups_trgm_text_search_idx ON public.groups USING gin (((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text))) public.gin_trgm_ops);
+
+
--
-- Name: humans_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX jobs_search_index ON public.jobs USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, submit_id, script, script_version, cancelled_by_client_uuid, cancelled_by_user_uuid, output, is_locked_by_uuid, log, repository, supplied_script_version, docker_image_locator, state, arvados_sdk_version);
+--
+-- Name: jobs_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX jobs_trgm_text_search_idx ON public.jobs USING gin (((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))) public.gin_trgm_ops);
+
+
--
-- Name: keep_disks_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX pipeline_instances_search_index ON public.pipeline_instances USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, pipeline_template_uuid, name, state);
+--
+-- Name: pipeline_instances_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instances USING gin (((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops);
+
+
--
-- Name: pipeline_template_owner_uuid_name_unique; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX pipeline_templates_search_index ON public.pipeline_templates USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name);
+--
+-- Name: pipeline_templates_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX pipeline_templates_trgm_text_search_idx ON public.pipeline_templates USING gin (((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops);
+
+
--
-- Name: repositories_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX workflows_search_idx ON public.workflows USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name);
+--
+-- Name: workflows_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX workflows_trgm_text_search_idx ON public.workflows USING gin (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text))) public.gin_trgm_ops);
+
+
--
-- PostgreSQL database dump complete
--
('20181213183234'),
('20190214214814'),
('20190322174136'),
-('20190422144631');
+('20190422144631'),
+('20190523180148');
cond_out = []
+ if attrs_in == 'any' && (operator.casecmp('ilike').zero? || operator.casecmp('like').zero?) && (operand.is_a? String) && operand.match('^[%].*[%]$')
+ # Trigram index search
+ cond_out << model_class.full_text_trgm + " #{operator} ?"
+ param_out << operand
+ # Skip the generic per-column operator loop below
+ attrs = []
+ end
+
if operator == '@@'
# Full-text search
if attrs_in != 'any'
end
end
+ [
+ %w[collections collections_trgm_text_search_idx],
+ %w[container_requests container_requests_trgm_text_search_idx],
+ %w[groups groups_trgm_text_search_idx],
+ %w[jobs jobs_trgm_text_search_idx],
+ %w[pipeline_instances pipeline_instances_trgm_text_search_idx],
+ %w[pipeline_templates pipeline_templates_trgm_text_search_idx],
+ %w[workflows workflows_trgm_text_search_idx]
+ ].each do |model|
+ table = model[0]
+ indexname = model[1]
+ test "trigram index exists on #{table} model" do
+ table_class = table.classify.constantize
+ expect = table_class.full_text_searchable_columns
+ ok = false
+ conn = ActiveRecord::Base.connection
+ conn.exec_query("SELECT indexdef FROM pg_indexes WHERE tablename = '#{table}' AND indexname = '#{indexname}'").each do |res|
+ searchable = res['indexdef'].scan(/COALESCE\(+([A-Za-z_]+)/).flatten
+ ok = (expect == searchable)
+ assert ok, "Invalid or no trigram index on #{table} named #{indexname}\nexpect: #{expect.inspect}\nfound: #{searchable}"
+ end
+ end
+ end
+
test "selectable_attributes includes database attributes" do
assert_includes(Job.selectable_attributes, "success")
end
RUN apt-get update && \
apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
- postgresql-9.6 git build-essential runit curl libpq-dev \
+ postgresql-9.6 postgresql-contrib-9.6 git build-essential runit curl libpq-dev \
libcurl4-openssl-dev libssl1.0-dev zlib1g-dev libpcre3-dev \
openssh-server python-setuptools netcat-traditional \
python-epydoc graphviz bzip2 less sudo virtualenv \
if ! (psql postgres -c "\du" | grep "^ arvados ") >/dev/null ; then
psql postgres -c "create user arvados with password '$database_pw'"
- psql postgres -c "ALTER USER arvados CREATEDB;"
fi
+psql postgres -c "ALTER USER arvados WITH SUPERUSER;"
sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml