Merge branch 'master' into 15106-trgm-text-search
[arvados.git] / services / api / app / models / arvados_model.rb
index 05deba7bc153b50f5af256dd7a3cc97f1e942454..91c5a1923c95beaa674dc255835dda50153b5661 100644 (file)
@@ -2,13 +2,16 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+require 'arvados_model_updates'
 require 'has_uuid'
 require 'record_filters'
 require 'serializers'
+require 'request_error'
 
-class ArvadosModel < ActiveRecord::Base
+class ArvadosModel < ApplicationRecord
   self.abstract_class = true
 
+  include ArvadosModelUpdates
   include CurrentApiClient      # current_user, current_api_client, etc.
   include DbCurrentTime
   extend RecordFilters
@@ -24,7 +27,6 @@ class ArvadosModel < ActiveRecord::Base
   after_create :log_create
   after_update :log_update
   after_destroy :log_destroy
-  after_find :convert_serialized_symbols_to_strings
   before_validation :normalize_collection_uuids
   before_validation :set_default_owner
   validate :ensure_valid_uuids
@@ -38,37 +40,47 @@ class ArvadosModel < ActiveRecord::Base
            class_name: 'Link',
            primary_key: :uuid)
 
-  class PermissionDeniedError < StandardError
+  # If async is true at create or update, permission graph
+  # update is deferred allowing making multiple calls without the performance
+  # penalty.
+  attr_accessor :async_permissions_update
+
+  # Ignore listed attributes on mass assignments
+  def self.protected_attributes
+    []
+  end
+
+  class PermissionDeniedError < RequestError
     def http_status
       403
     end
   end
 
-  class AlreadyLockedError < StandardError
+  class AlreadyLockedError < RequestError
     def http_status
       422
     end
   end
 
-  class LockFailedError < StandardError
+  class LockFailedError < RequestError
     def http_status
       422
     end
   end
 
-  class InvalidStateTransitionError < StandardError
+  class InvalidStateTransitionError < RequestError
     def http_status
       422
     end
   end
 
-  class UnauthorizedError < StandardError
+  class UnauthorizedError < RequestError
     def http_status
       401
     end
   end
 
-  class UnresolvableContainerError < StandardError
+  class UnresolvableContainerError < RequestError
     def http_status
       422
     end
@@ -89,7 +101,11 @@ class ArvadosModel < ActiveRecord::Base
     # The following permit! is necessary even with
     # "ActionController::Parameters.permit_all_parameters = true",
     # because permit_all does not permit nested attributes.
+    raw_params ||= {}
+
     if raw_params
+      raw_params = raw_params.to_hash
+      raw_params.delete_if { |k, _| self.protected_attributes.include? k }
       serialized_attributes.each do |colname, coder|
         param = raw_params[colname.to_sym]
         if param.nil?
@@ -100,6 +116,15 @@ class ArvadosModel < ActiveRecord::Base
           raise ArgumentError.new("#{colname} parameter cannot have non-string hash keys")
         end
       end
+      # Check JSONB columns that aren't listed on serialized_attributes
+      columns.select{|c| c.type == :jsonb}.collect{|j| j.name}.each do |colname|
+        if serialized_attributes.include? colname || raw_params[colname.to_sym].nil?
+          next
+        end
+        if has_nonstring_keys?(raw_params[colname.to_sym])
+          raise ArgumentError.new("#{colname} parameter cannot have non-string hash keys")
+        end
+      end
     end
     ActionController::Parameters.new(raw_params).permit!
   end
@@ -237,7 +262,7 @@ class ArvadosModel < ActiveRecord::Base
     end.compact.uniq
   end
 
-  # Return a query with read permissions restricted to the union of of the
+  # Return a query with read permissions restricted to the union of the
   # permissions of the members of users_list, i.e. if something is readable by
   # any user in users_list, it will be readable in the query returned by this
   # function.
@@ -255,57 +280,76 @@ class ArvadosModel < ActiveRecord::Base
     # Collect the UUIDs of the authorized users.
     sql_table = kwargs.fetch(:table_name, table_name)
     include_trash = kwargs.fetch(:include_trash, false)
+    include_old_versions = kwargs.fetch(:include_old_versions, false)
 
-    sql_conds = []
+    sql_conds = nil
     user_uuids = users_list.map { |u| u.uuid }
 
-    exclude_trashed_records = if !include_trash and (sql_table == "groups" or sql_table == "collections") then
-                                # Only include records that are not explicitly trashed
-                                "AND #{sql_table}.is_trashed = false"
-                              else
-                                ""
-                              end
+    exclude_trashed_records = ""
+    if !include_trash and (sql_table == "groups" or sql_table == "collections") then
+      # Only include records that are not explicitly trashed
+      exclude_trashed_records = "AND #{sql_table}.is_trashed = false"
+    end
 
     if users_list.select { |u| u.is_admin }.any?
+      # Admin skips most permission checks, but still want to filter on trashed items.
       if !include_trash
         if sql_table != "api_client_authorizations"
-          # Exclude rows where the owner is trashed
-          sql_conds.push "NOT EXISTS(SELECT 1 "+
-                  "FROM #{PERMISSION_VIEW} "+
-                  "WHERE trashed = 1 AND "+
-                  "(#{sql_table}.owner_uuid = target_uuid)) "+
-                  exclude_trashed_records
+          # Only include records where the owner is not trashed
+          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+                      "WHERE trashed = 1) #{exclude_trashed_records}"
         end
       end
     else
-      trashed_check = if !include_trash then
-                        "AND trashed = 0"
-                      else
-                        ""
-                      end
-
-      owner_check = if sql_table != "api_client_authorizations" and sql_table != "groups" then
-                      "OR (target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL)"
-                    else
-                      ""
-                    end
-
-      sql_conds.push "EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
-                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND (target_uuid = #{sql_table}.uuid #{owner_check})) "+
-                     exclude_trashed_records
+      trashed_check = ""
+      if !include_trash then
+        trashed_check = "AND trashed = 0"
+      end
 
+      # Note: it is possible to combine the direct_check and
+      # owner_check into a single EXISTS() clause, however it turns
+      # out query optimizer doesn't like it and forces a sequential
+      # table scan.  Constructing the query with separate EXISTS()
+      # clauses enables it to use the index.
+      #
+      # see issue 13208 for details.
+
+      # Match a direct read permission link from the user to the record uuid
+      direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check})"
+
+      # Match a read permission link from the user to the record's owner_uuid
+      owner_check = ""
+      if sql_table != "api_client_authorizations" and sql_table != "groups" then
+        owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
+          "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_owner_uuid IS NOT NULL) "
+      end
+
+      links_cond = ""
       if sql_table == "links"
         # Match any permission link that gives one of the authorized
         # users some permission _or_ gives anyone else permission to
         # view one of the authorized users.
-        sql_conds.push "(#{sql_table}.link_class IN (:permission_link_classes) AND "+
+        links_cond = "OR (#{sql_table}.link_class IN (:permission_link_classes) AND "+
                        "(#{sql_table}.head_uuid IN (:user_uuids) OR #{sql_table}.tail_uuid IN (:user_uuids)))"
       end
+
+      sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
+
     end
 
-    self.where(sql_conds.join(' OR '),
-                    user_uuids: user_uuids,
-                    permission_link_classes: ['permission', 'resources'])
+    if !include_old_versions && sql_table == "collections"
+      exclude_old_versions = "#{sql_table}.uuid = #{sql_table}.current_version_uuid"
+      if sql_conds.nil?
+        sql_conds = exclude_old_versions
+      else
+        sql_conds += " AND #{exclude_old_versions}"
+      end
+    end
+
+    self.where(sql_conds,
+               user_uuids: user_uuids,
+               permission_link_classes: ['permission', 'resources'])
   end
 
   def save_with_unique_name!
@@ -330,7 +374,7 @@ class ArvadosModel < ActiveRecord::Base
         # discover a unique name.  It is necessary to handle name choosing at
         # this level (as opposed to the client) to ensure that record creation
         # never fails due to a race condition.
-        err = rn.original_exception
+        err = rn.cause
         raise unless err.is_a?(PG::UniqueViolation)
 
         # Unfortunately ActiveRecord doesn't abstract out any of the
@@ -350,7 +394,13 @@ class ArvadosModel < ActiveRecord::Base
         end
 
         self[:name] = new_name
-        self[:uuid] = nil if uuid_was.nil? && !uuid.nil?
+        if uuid_was.nil? && !uuid.nil?
+          self[:uuid] = nil
+          if self.is_a? Collection
+            # Reset so that is assigned to the new UUID
+            self[:current_version_uuid] = nil
+          end
+        end
         conn.exec_query 'SAVEPOINT save_with_unique_name'
         retry
       ensure
@@ -360,7 +410,7 @@ class ArvadosModel < ActiveRecord::Base
   end
 
   def logged_attributes
-    attributes.except(*Rails.configuration.unlogged_attributes)
+    attributes.except(*Rails.configuration.AuditLogs.UnloggedAttributes)
   end
 
   def self.full_text_searchable_columns
@@ -369,12 +419,25 @@ class ArvadosModel < ActiveRecord::Base
     end.map(&:name)
   end
 
+  def self.full_text_coalesce
+    full_text_searchable_columns.collect do |column|
+      is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb
+      cast = (is_jsonb || serialized_attributes[column]) ? '::text' : ''
+      "coalesce(#{column}#{cast},'')"
+    end
+  end
+
+  def self.full_text_trgm
+    "(#{full_text_coalesce.join(" || ' ' || ")})"
+  end
+
   def self.full_text_tsvector
     parts = full_text_searchable_columns.collect do |column|
-      cast = serialized_attributes[column] ? '::text' : ''
+      is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb
+      cast = (is_jsonb || serialized_attributes[column]) ? '::text' : ''
       "coalesce(#{column}#{cast},'')"
     end
-    "to_tsvector('english', #{parts.join(" || ' ' || ")})"
+    "to_tsvector('english', substr(#{parts.join(" || ' ' || ")}, 0, 8000))"
   end
 
   def self.apply_filters query, filters
@@ -416,7 +479,7 @@ class ArvadosModel < ActiveRecord::Base
           end
         rescue ActiveRecord::RecordNotFound => e
           errors.add :owner_uuid, "is not owned by any user: #{e}"
-          return false
+          throw(:abort)
         end
         if uuid_in_path[x]
           if x == owner_uuid
@@ -424,7 +487,7 @@ class ArvadosModel < ActiveRecord::Base
           else
             errors.add :owner_uuid, "has an ownership cycle"
           end
-          return false
+          throw(:abort)
         end
         uuid_in_path[x] = true
       end
@@ -524,11 +587,15 @@ class ArvadosModel < ActiveRecord::Base
 
   def update_modified_by_fields
     current_time = db_current_time
-    self.created_at = created_at_was || current_time
+    self.created_at ||= created_at_was || current_time
     self.updated_at = current_time
     self.owner_uuid ||= current_default_owner if self.respond_to? :owner_uuid=
-    self.modified_at = current_time
-    self.modified_by_user_uuid = current_user ? current_user.uuid : nil
+    if !anonymous_updater
+      self.modified_by_user_uuid = current_user ? current_user.uuid : nil
+    end
+    if !timeless_updater
+      self.modified_at = current_time
+    end
     self.modified_by_client_uuid = current_api_client ? current_api_client.uuid : nil
     true
   end
@@ -546,51 +613,24 @@ class ArvadosModel < ActiveRecord::Base
     false
   end
 
-  def self.has_symbols? x
-    if x.is_a? Hash
-      x.each do |k,v|
-        return true if has_symbols?(k) or has_symbols?(v)
-      end
-    elsif x.is_a? Array
-      x.each do |k|
-        return true if has_symbols?(k)
-      end
-    elsif x.is_a? Symbol
-      return true
-    elsif x.is_a? String
-      return true if x.start_with?(':') && !x.start_with?('::')
+  def self.where_serialized(colname, value, md5: false)
+    colsql = colname.to_s
+    if md5
+      colsql = "md5(#{colsql})"
     end
-    false
-  end
-
-  def self.recursive_stringify x
-    if x.is_a? Hash
-      Hash[x.collect do |k,v|
-             [recursive_stringify(k), recursive_stringify(v)]
-           end]
-    elsif x.is_a? Array
-      x.collect do |k|
-        recursive_stringify k
-      end
-    elsif x.is_a? Symbol
-      x.to_s
-    elsif x.is_a? String and x.start_with?(':') and !x.start_with?('::')
-      x[1..-1]
-    else
-      x
-    end
-  end
-
-  def self.where_serialized(colname, value)
     if value.empty?
       # rails4 stores as null, rails3 stored as serialized [] or {}
-      sql = "#{colname.to_s} is null or #{colname.to_s} IN (?)"
+      sql = "#{colsql} is null or #{colsql} IN (?)"
       sorted = value
     else
-      sql = "#{colname.to_s} IN (?)"
+      sql = "#{colsql} IN (?)"
       sorted = deep_sort_hash(value)
     end
-    where(sql, [sorted.to_yaml, SafeJSON.dump(sorted)])
+    params = [sorted.to_yaml, SafeJSON.dump(sorted)]
+    if md5
+      params = params.map { |x| Digest::MD5.hexdigest(x) }
+    end
+    where(sql, params)
   end
 
   Serializer = {
@@ -613,22 +653,6 @@ class ArvadosModel < ActiveRecord::Base
     self.class.serialized_attributes
   end
 
-  def convert_serialized_symbols_to_strings
-    # ensure_serialized_attribute_type should prevent symbols from
-    # getting into the database in the first place. If someone managed
-    # to get them into the database (perhaps using an older version)
-    # we'll convert symbols to strings when loading from the
-    # database. (Otherwise, loading and saving an object with existing
-    # symbols in a serialized field will crash.)
-    self.class.serialized_attributes.each do |colname, attr|
-      if self.class.has_symbols? attributes[colname]
-        attributes[colname] = self.class.recursive_stringify attributes[colname]
-        send(colname + '=',
-             self.class.recursive_stringify(attributes[colname]))
-      end
-    end
-  end
-
   def foreign_key_attributes
     attributes.keys.select { |a| a.match(/_uuid$/) }
   end
@@ -670,7 +694,7 @@ class ArvadosModel < ActiveRecord::Base
   end
 
   def self.uuid_like_pattern
-    "#{Rails.configuration.uuid_prefix}-#{uuid_prefix}-_______________"
+    "#{Rails.configuration.ClusterID}-#{uuid_prefix}-_______________"
   end
 
   def self.uuid_regex
@@ -748,36 +772,51 @@ class ArvadosModel < ActiveRecord::Base
     end
   end
 
+  def is_audit_logging_enabled?
+    return !(Rails.configuration.AuditLogs.MaxAge.to_i == 0 &&
+             Rails.configuration.AuditLogs.MaxDeleteBatch.to_i > 0)
+  end
+
   def log_start_state
-    @old_attributes = Marshal.load(Marshal.dump(attributes))
-    @old_logged_attributes = Marshal.load(Marshal.dump(logged_attributes))
+    if is_audit_logging_enabled?
+      @old_attributes = Marshal.load(Marshal.dump(attributes))
+      @old_logged_attributes = Marshal.load(Marshal.dump(logged_attributes))
+    end
   end
 
   def log_change(event_type)
-    log = Log.new(event_type: event_type).fill_object(self)
-    yield log
-    log.save!
-    log_start_state
+    if is_audit_logging_enabled?
+      log = Log.new(event_type: event_type).fill_object(self)
+      yield log
+      log.save!
+      log_start_state
+    end
   end
 
   def log_create
-    log_change('create') do |log|
-      log.fill_properties('old', nil, nil)
-      log.update_to self
+    if is_audit_logging_enabled?
+      log_change('create') do |log|
+        log.fill_properties('old', nil, nil)
+        log.update_to self
+      end
     end
   end
 
   def log_update
-    log_change('update') do |log|
-      log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
-      log.update_to self
+    if is_audit_logging_enabled?
+      log_change('update') do |log|
+        log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
+        log.update_to self
+      end
     end
   end
 
   def log_destroy
-    log_change('delete') do |log|
-      log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
-      log.update_to nil
+    if is_audit_logging_enabled?
+      log_change('delete') do |log|
+        log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
+        log.update_to nil
+      end
     end
   end
 end