Merge branch 'master' into 4523-full-text-search
authorRadhika Chippada <radhika@curoverse.com>
Thu, 29 Jan 2015 02:56:18 +0000 (21:56 -0500)
committerRadhika Chippada <radhika@curoverse.com>
Thu, 29 Jan 2015 02:56:18 +0000 (21:56 -0500)
services/api/app/models/arvados_model.rb
services/api/app/models/collection.rb
services/api/db/migrate/20150116142953_full_text_search.rb [new file with mode: 0644]
services/api/lib/record_filters.rb
services/api/test/integration/collections_api_test.rb
services/api/test/integration/groups_test.rb
services/api/test/unit/collection_test.rb

index 308da7fa11576acc00066bb47de5592cf2543f96..96c3841abd71a4b942c2d30f2c510384a2991461 100644 (file)
@@ -205,6 +205,25 @@ class ArvadosModel < ActiveRecord::Base
     attributes
   end
 
+  def self.full_text_searchable_columns
+    self.columns.select do |col|
+      if col.type == :string or col.type == :text
+        true
+      end
+    end.map(&:name)
+  end
+
+  def self.full_text_tsvector
+    tsvector_str = "to_tsvector('english', "
+    first = true
+    self.full_text_searchable_columns.each do |column|
+      tsvector_str += " || ' ' || " if not first
+      tsvector_str += "coalesce(#{column},'')"
+      first = false
+    end
+    tsvector_str += ")"
+  end
+
   protected
 
   def ensure_ownership_path_leads_to_user
index 457fb5f778cb3429d0d19f3104b6142a2a171dfe..80838f43e0e99116cfb908de1a84efd370962952 100644 (file)
@@ -300,6 +300,10 @@ class Collection < ArvadosModel
     super - ["manifest_text"]
   end
 
+  def self.full_text_searchable_columns
+    super - ["manifest_text"]
+  end
+
   protected
   def portable_manifest_text
     portable_manifest = self[:manifest_text].dup
diff --git a/services/api/db/migrate/20150116142953_full_text_search.rb b/services/api/db/migrate/20150116142953_full_text_search.rb
new file mode 100644 (file)
index 0000000..4d93210
--- /dev/null
@@ -0,0 +1,18 @@
+class FullTextSearch < ActiveRecord::Migration
+
+  def up
+    execute "CREATE INDEX collections_full_text_search_idx ON collections USING gin(#{Collection.full_text_tsvector});"
+    execute "CREATE INDEX groups_full_text_search_idx ON groups USING gin(#{Group.full_text_tsvector});"
+    execute "CREATE INDEX jobs_full_text_search_idx ON jobs USING gin(#{Job.full_text_tsvector});"
+    execute "CREATE INDEX pipeline_instances_full_text_search_idx ON pipeline_instances USING gin(#{PipelineInstance.full_text_tsvector});"
+    execute "CREATE INDEX pipeline_templates_full_text_search_idx ON pipeline_templates USING gin(#{PipelineTemplate.full_text_tsvector});"
+  end
+
+  def down
+    remove_index :pipeline_templates, :name => 'pipeline_templates_full_text_search_idx'
+    remove_index :pipeline_instances, :name => 'pipeline_instances_full_text_search_idx'
+    remove_index :jobs, :name => 'jobs_full_text_search_idx'
+    remove_index :groups, :name => 'groups_full_text_search_idx'
+    remove_index :collections, :name => 'collections_full_text_search_idx'
+  end
+end
index 9408dcfade120e5b68235f952eb980ef7c443c89..0156bb5bfcedf73e0b2400d5057dc3e29811c40a 100644 (file)
@@ -22,7 +22,7 @@ module RecordFilters
     ar_table_name = model_class.table_name
     filters.each do |filter|
       attrs_in, operator, operand = filter
-      if attrs_in == 'any'
+      if attrs_in == 'any' && operator != '@@'
         attrs = model_class.searchable_columns(operator)
       elsif attrs_in.is_a? Array
         attrs = attrs_in
@@ -35,7 +35,12 @@ module RecordFilters
         raise ArgumentError.new("Invalid operator '#{operator}' (#{operator.class}) in filter")
       end
       cond_out = []
-      attrs.each do |attr|
+
+      if operator == '@@' # full-text-search
+        cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)"
+        param_out << operand.split.each {|s| s.concat(':*')}.join(' & ')
+      else
+       attrs.each do |attr|
         if !model_class.searchable_columns(operator).index attr.to_s
           raise ArgumentError.new("Invalid attribute '#{attr}' in filter")
         end
@@ -104,6 +109,7 @@ module RecordFilters
           end
           cond_out << cond.join(' OR ')
         end
+       end
       end
       conds_out << cond_out.join(' OR ') if cond_out.any?
     end
index bea76aabfd09339c7e6e7a639451b0fcaa21c858..b6cea740243123fb1552728c709d117738e37e75 100644 (file)
@@ -204,4 +204,44 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
       assert_not_nil first_item
     end
   end
+
+  test "search collection using full text search" do
+    # create collection to be searched for
+    signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active))
+    post "/arvados/v1/collections", {
+      format: :json,
+      collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json,
+    }, auth(:active)
+    assert_response :success
+    assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
+
+    created = json_response
+
+    # search using the filename
+    search_using_full_text_search 'subdir2', 1
+    search_using_full_text_search 'subdir2/subdir', 1
+    search_using_full_text_search 'subdir2/subdir3/subdir4', 1
+    search_using_full_text_search 'file4', 1
+    search_using_full_text_search 'file4_in_subdir', 1
+    search_using_full_text_search 'subdir2 file4', 1      # look for prefixes subdir2 and file4
+    search_using_full_text_search 'subdir2 ile4', 0
+    search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1
+  end
+
+  def search_using_full_text_search search_filter, expected_items
+    get '/arvados/v1/collections', {
+      :filters => [['any', '@@', search_filter]].to_json
+    }, auth(:active)
+    assert_response :success
+    response_items = json_response['items']
+    assert_not_nil response_items
+    if expected_items == 0
+      assert_equal 0, json_response['items_available']
+      assert_equal 0, response_items.size
+    else
+      assert_equal expected_items, response_items.size, "Did not find results for #{search_filter}"
+      first_item = response_items.first
+      assert_not_nil first_item
+    end
+  end
 end
index 0f6f93aa1307bf5b90743f3915ef0d75e88b18c7..f37b9860b52faa3a98008bad888ff4a11a8711fd 100644 (file)
@@ -39,4 +39,31 @@ class GroupsTest < ActionDispatch::IntegrationTest
     end
   end
 
+  [
+    ['Collection_', true],           # collections and pipelines templates
+    ['hash', true],                  # pipeline templates
+    ['fa7aeb5140e2848d39b', true],   # script_parameter of pipeline instances
+    ['no-such-thing', false],        # script_parameter of pipeline instances
+  ].each do |search_filter, expect_results|
+    test "full text search of group-owned objects for #{search_filter}" do
+      get "/arvados/v1/groups/contents", {
+        id: groups(:aproject).uuid,
+        limit: 5,
+        :filters => [['any', '@@', search_filter]].to_json
+      }, auth(:active)
+      assert_response :success
+      if expect_results
+        assert_operator(0, :<, json_response['items'].count,
+                        "expected results but received 0")
+        json_response['items'].each do |item|
+          assert item['uuid']
+          assert_equal groups(:aproject).uuid, item['owner_uuid']
+        end
+      else
+        assert_operator(0, :==, json_response['items'].count,
+                        "expected no results but received #{json_response['items'].length}")
+      end
+    end
+  end
+
 end
index 16d041bea2e93a41eede051ade8dc87882936aa1..25ab6cd68496ceed0137078d1ea07ced04b81adf 100644 (file)
@@ -81,4 +81,41 @@ class CollectionTest < ActiveSupport::TestCase
       end
     end
   end
+
+  test "full text search for collections" do
+    # file_names column does not get populated when fixtures are loaded, hence setup test data
+    act_as_system_user do
+      Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
+      Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
+      Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt")
+    end
+
+    [
+      ['foo', true],
+      ['foo bar', false],                     # no collection matching both
+      ['foo&bar', false],                     # no collection matching both
+      ['foo|bar', true],                      # works only no spaces between the words
+      ['Gnu public', true],                   # both prefixes found, though not consecutively
+      ['Gnu&public', true],                   # both prefixes found, though not consecutively
+      ['file4', true],                        # prefix match
+      ['file4.txt', true],                    # whole string match
+      ['filex', false],                       # no such prefix
+      ['subdir', true],                       # prefix matches
+      ['subdir2', true],
+      ['subdir2/', true],
+      ['subdir2/subdir3', true],
+      ['subdir2/subdir3/subdir4', true],
+      ['subdir2 file4', true],                # look for both prefixes
+      ['subdir4', false],                     # not a prefix match
+    ].each do |search_filter, expect_results|
+      search_filters = search_filter.split.each {|s| s.concat(':*')}.join('&')
+      results = Collection.where("#{Collection.full_text_tsvector} @@ to_tsquery(?)",
+                                 "#{search_filters}")
+      if expect_results
+        assert_equal true, results.length>0, "No results found for '#{search_filter}'"
+      else
+        assert_equal 0, results.length, "Found #{results.length} results for '#{search_filter}'"
+      end
+    end
+  end
 end