attributes
end
+ def self.full_text_searchable_columns
+ self.columns.select do |col|
+ if col.type == :string or col.type == :text
+ true
+ end
+ end.map(&:name)
+ end
+
+ def self.full_text_tsvector
+ tsvector_str = "to_tsvector('english', "
+ first = true
+ self.full_text_searchable_columns.each do |column|
+ tsvector_str += " || ' ' || " if not first
+ tsvector_str += "coalesce(#{column},'')"
+ first = false
+ end
+ tsvector_str += ")"
+ end
+
protected
def ensure_ownership_path_leads_to_user
super - ["manifest_text"]
end
+ def self.full_text_searchable_columns
+ super - ["manifest_text"]
+ end
+
protected
def portable_manifest_text
portable_manifest = self[:manifest_text].dup
--- /dev/null
+class FullTextSearch < ActiveRecord::Migration
+
+ def up
+ execute "CREATE INDEX collections_full_text_search_idx ON collections USING gin(#{Collection.full_text_tsvector});"
+ execute "CREATE INDEX groups_full_text_search_idx ON groups USING gin(#{Group.full_text_tsvector});"
+ execute "CREATE INDEX jobs_full_text_search_idx ON jobs USING gin(#{Job.full_text_tsvector});"
+ execute "CREATE INDEX pipeline_instances_full_text_search_idx ON pipeline_instances USING gin(#{PipelineInstance.full_text_tsvector});"
+ execute "CREATE INDEX pipeline_templates_full_text_search_idx ON pipeline_templates USING gin(#{PipelineTemplate.full_text_tsvector});"
+ end
+
+ def down
+ remove_index :pipeline_templates, :name => 'pipeline_templates_full_text_search_idx'
+ remove_index :pipeline_instances, :name => 'pipeline_instances_full_text_search_idx'
+ remove_index :jobs, :name => 'jobs_full_text_search_idx'
+ remove_index :groups, :name => 'groups_full_text_search_idx'
+ remove_index :collections, :name => 'collections_full_text_search_idx'
+ end
+end
ar_table_name = model_class.table_name
filters.each do |filter|
attrs_in, operator, operand = filter
- if attrs_in == 'any'
+ if attrs_in == 'any' && operator != '@@'
attrs = model_class.searchable_columns(operator)
elsif attrs_in.is_a? Array
attrs = attrs_in
raise ArgumentError.new("Invalid operator '#{operator}' (#{operator.class}) in filter")
end
cond_out = []
- attrs.each do |attr|
+
+ if operator == '@@' # full-text-search
+ cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)"
+ param_out << operand.split.each {|s| s.concat(':*')}.join(' & ')
+ else
+ attrs.each do |attr|
if !model_class.searchable_columns(operator).index attr.to_s
raise ArgumentError.new("Invalid attribute '#{attr}' in filter")
end
end
cond_out << cond.join(' OR ')
end
+ end
end
conds_out << cond_out.join(' OR ') if cond_out.any?
end
assert_not_nil first_item
end
end
+
+ test "search collection using full text search" do
+ # create collection to be searched for
+ signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active))
+ post "/arvados/v1/collections", {
+ format: :json,
+ collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json,
+ }, auth(:active)
+ assert_response :success
+ assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
+
+ created = json_response
+
+ # search using the filename
+ search_using_full_text_search 'subdir2', 1
+ search_using_full_text_search 'subdir2/subdir', 1
+ search_using_full_text_search 'subdir2/subdir3/subdir4', 1
+ search_using_full_text_search 'file4', 1
+ search_using_full_text_search 'file4_in_subdir', 1
+ search_using_full_text_search 'subdir2 file4', 1 # look for prefixes subdir2 and file4
+ search_using_full_text_search 'subdir2 ile4', 0
+ search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1
+ end
+
+ def search_using_full_text_search search_filter, expected_items
+ get '/arvados/v1/collections', {
+ :filters => [['any', '@@', search_filter]].to_json
+ }, auth(:active)
+ assert_response :success
+ response_items = json_response['items']
+ assert_not_nil response_items
+ if expected_items == 0
+ assert_equal 0, json_response['items_available']
+ assert_equal 0, response_items.size
+ else
+ assert_equal expected_items, response_items.size, "Did not find results for #{search_filter}"
+ first_item = response_items.first
+ assert_not_nil first_item
+ end
+ end
end
end
end
+ [
+ ['Collection_', true], # collections and pipelines templates
+ ['hash', true], # pipeline templates
+ ['fa7aeb5140e2848d39b', true], # script_parameter of pipeline instances
+ ['no-such-thing', false], # script_parameter of pipeline instances
+ ].each do |search_filter, expect_results|
+ test "full text search of group-owned objects for #{search_filter}" do
+ get "/arvados/v1/groups/contents", {
+ id: groups(:aproject).uuid,
+ limit: 5,
+ :filters => [['any', '@@', search_filter]].to_json
+ }, auth(:active)
+ assert_response :success
+ if expect_results
+ assert_operator(0, :<, json_response['items'].count,
+ "expected results but received 0")
+ json_response['items'].each do |item|
+ assert item['uuid']
+ assert_equal groups(:aproject).uuid, item['owner_uuid']
+ end
+ else
+ assert_operator(0, :==, json_response['items'].count,
+ "expected no results but received #{json_response['items'].length}")
+ end
+ end
+ end
+
end
end
end
end
+
+ test "full text search for collections" do
+ # file_names column does not get populated when fixtures are loaded, hence setup test data
+ act_as_system_user do
+ Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
+ Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
+ Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt")
+ end
+
+ [
+ ['foo', true],
+ ['foo bar', false], # no collection matching both
+ ['foo&bar', false], # no collection matching both
+ ['foo|bar', true], # works only no spaces between the words
+ ['Gnu public', true], # both prefixes found, though not consecutively
+ ['Gnu&public', true], # both prefixes found, though not consecutively
+ ['file4', true], # prefix match
+ ['file4.txt', true], # whole string match
+ ['filex', false], # no such prefix
+ ['subdir', true], # prefix matches
+ ['subdir2', true],
+ ['subdir2/', true],
+ ['subdir2/subdir3', true],
+ ['subdir2/subdir3/subdir4', true],
+ ['subdir2 file4', true], # look for both prefixes
+ ['subdir4', false], # not a prefix match
+ ].each do |search_filter, expect_results|
+ search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
+ results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
+ "#{search_filters}")
+ if expect_results
+ assert_equal true, results.length>0, "No results found for '#{search_filter}'"
+ else
+ assert_equal 0, results.length, "Found #{results.length} results for '#{search_filter}'"
+ end
+ end
+ end
end