11065: Merge branch 'master' into 11065-optional-audit-logging
[arvados.git] / services / api / app / models / arvados_model.rb
index 96dc85e1e44138485899942a084e8095d3eb9182..c67a3961d94a404468fcebd08107869b8e59ac46 100644 (file)
@@ -1,18 +1,21 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require 'arvados_model_updates'
 require 'has_uuid'
 require 'record_filters'
 require 'serializers'
+require 'request_error'
 
 class ArvadosModel < ActiveRecord::Base
   self.abstract_class = true
 
+  include ArvadosModelUpdates
   include CurrentApiClient      # current_user, current_api_client, etc.
   include DbCurrentTime
   extend RecordFilters
 
-  attr_protected :created_at
-  attr_protected :modified_by_user_uuid
-  attr_protected :modified_by_client_uuid
-  attr_protected :modified_at
   after_initialize :log_start_state
   before_save :ensure_permission_to_save
   before_save :ensure_owner_uuid_is_permitted
@@ -27,43 +30,48 @@ class ArvadosModel < ActiveRecord::Base
   after_find :convert_serialized_symbols_to_strings
   before_validation :normalize_collection_uuids
   before_validation :set_default_owner
-  validate :ensure_serialized_attribute_type
   validate :ensure_valid_uuids
 
   # Note: This only returns permission links. It does not account for
   # permissions obtained via user.is_admin or
   # user.uuid==object.owner_uuid.
   has_many(:permissions,
+           ->{where(link_class: 'permission')},
            foreign_key: :head_uuid,
            class_name: 'Link',
-           primary_key: :uuid,
-           conditions: "link_class = 'permission'")
+           primary_key: :uuid)
 
-  class PermissionDeniedError < StandardError
+  class PermissionDeniedError < RequestError
     def http_status
       403
     end
   end
 
-  class AlreadyLockedError < StandardError
+  class AlreadyLockedError < RequestError
     def http_status
       422
     end
   end
 
-  class InvalidStateTransitionError < StandardError
+  class LockFailedError < RequestError
     def http_status
       422
     end
   end
 
-  class UnauthorizedError < StandardError
+  class InvalidStateTransitionError < RequestError
+    def http_status
+      422
+    end
+  end
+
+  class UnauthorizedError < RequestError
     def http_status
       401
     end
   end
 
-  class UnresolvableContainerError < StandardError
+  class UnresolvableContainerError < RequestError
     def http_status
       422
     end
@@ -77,6 +85,46 @@ class ArvadosModel < ActiveRecord::Base
     "#{current_api_base}/#{self.class.to_s.pluralize.underscore}/#{self.uuid}"
   end
 
+  def self.permit_attribute_params raw_params
+    # strong_parameters does not provide security: permissions are
+    # implemented with before_save hooks.
+    #
+    # The following permit! is necessary even with
+    # "ActionController::Parameters.permit_all_parameters = true",
+    # because permit_all does not permit nested attributes.
+    if raw_params
+      serialized_attributes.each do |colname, coder|
+        param = raw_params[colname.to_sym]
+        if param.nil?
+          # ok
+        elsif !param.is_a?(coder.object_class)
+          raise ArgumentError.new("#{colname} parameter must be #{coder.object_class}, not #{param.class}")
+        elsif has_nonstring_keys?(param)
+          raise ArgumentError.new("#{colname} parameter cannot have non-string hash keys")
+        end
+      end
+    end
+    ActionController::Parameters.new(raw_params).permit!
+  end
+
+  def initialize raw_params={}, *args
+    super(self.class.permit_attribute_params(raw_params), *args)
+  end
+
+  # Reload "old attributes" for logging, too.
+  def reload(*args)
+    super
+    log_start_state
+  end
+
+  def self.create raw_params={}, *args
+    super(permit_attribute_params(raw_params), *args)
+  end
+
+  def update_attributes raw_params={}, *args
+    super(self.class.permit_attribute_params(raw_params), *args)
+  end
+
   def self.selectable_attributes(template=:user)
     # Return an array of attribute name strings that can be selected
     # in the given template.
@@ -155,6 +203,14 @@ class ArvadosModel < ActiveRecord::Base
     ["id", "uuid"]
   end
 
+  def self.limit_index_columns_read
+    # This method returns a list of column names.
+    # If an index request reads that column from the database,
+    # APIs that return lists will only fetch objects until reaching
+    # max_index_database_read bytes of data from those columns.
+    []
+  end
+
   # If current user can manage the object, return an array of uuids of
   # users and groups that have permission to write the object. The
   # first two elements are always [self.owner_uuid, current user's
@@ -199,48 +255,70 @@ class ArvadosModel < ActiveRecord::Base
       kwargs = {}
     end
 
-    # Check if any of the users are admin.  If so, we're done.
-    if users_list.select { |u| u.is_admin }.any?
-      return self
-    end
-
     # Collect the UUIDs of the authorized users.
+    sql_table = kwargs.fetch(:table_name, table_name)
+    include_trash = kwargs.fetch(:include_trash, false)
+
+    sql_conds = nil
     user_uuids = users_list.map { |u| u.uuid }
 
-    # Collect the UUIDs of all groups readable by any of the
-    # authorized users. If one of these (or the UUID of one of the
-    # authorized users themselves) is an object's owner_uuid, that
-    # object is readable.
-    owner_uuids = user_uuids + users_list.flat_map { |u| u.groups_i_can(:read) }
-    owner_uuids.uniq!
+    exclude_trashed_records = ""
+    if !include_trash and (sql_table == "groups" or sql_table == "collections") then
+      # Only include records that are not explicitly trashed
+      exclude_trashed_records = "AND #{sql_table}.is_trashed = false"
+    end
 
-    sql_conds = []
-    sql_table = kwargs.fetch(:table_name, table_name)
+    if users_list.select { |u| u.is_admin }.any?
+      # Admin skips most permission checks, but still want to filter on trashed items.
+      if !include_trash
+        if sql_table != "api_client_authorizations"
+          # Only include records where the owner is not trashed
+          sql_conds = "NOT EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
+                      "WHERE trashed = 1 AND "+
+                      "(#{sql_table}.owner_uuid = target_uuid)) #{exclude_trashed_records}"
+        end
+      end
+    else
+      trashed_check = ""
+      if !include_trash then
+        trashed_check = "AND trashed = 0"
+      end
 
-    # Match any object (evidently a group or user) whose UUID is
-    # listed explicitly in owner_uuids.
-    sql_conds += ["#{sql_table}.uuid in (:owner_uuids)"]
+      # Note: it is possible to combine the direct_check and
+      # owner_check into a single EXISTS() clause, however it turns
+      # out query optimizer doesn't like it and forces a sequential
+      # table scan.  Constructing the query with separate EXISTS()
+      # clauses enables it to use the index.
+      #
+      # see issue 13208 for details.
+
+      # Match a direct read permission link from the user to the record uuid
+      direct_check = "EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
+                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_uuid = #{sql_table}.uuid)"
+
+      # Match a read permission link from the user to the record's owner_uuid
+      owner_check = ""
+      if sql_table != "api_client_authorizations" and sql_table != "groups" then
+        owner_check = "OR EXISTS(SELECT 1 FROM #{PERMISSION_VIEW} "+
+          "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_uuid = #{sql_table}.owner_uuid AND target_owner_uuid IS NOT NULL) "
+      end
 
-    # Match any object whose owner is listed explicitly in
-    # owner_uuids.
-    sql_conds += ["#{sql_table}.owner_uuid IN (:owner_uuids)"]
+      links_cond = ""
+      if sql_table == "links"
+        # Match any permission link that gives one of the authorized
+        # users some permission _or_ gives anyone else permission to
+        # view one of the authorized users.
+        links_cond = "OR (#{sql_table}.link_class IN (:permission_link_classes) AND "+
+                       "(#{sql_table}.head_uuid IN (:user_uuids) OR #{sql_table}.tail_uuid IN (:user_uuids)))"
+      end
 
-    # Match the head of any permission link whose tail is listed
-    # explicitly in owner_uuids.
-    sql_conds += ["#{sql_table}.uuid IN (SELECT head_uuid FROM links WHERE link_class='permission' AND tail_uuid IN (:owner_uuids))"]
+      sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
 
-    if sql_table == "links"
-      # Match any permission link that gives one of the authorized
-      # users some permission _or_ gives anyone else permission to
-      # view one of the authorized users.
-      sql_conds += ["(#{sql_table}.link_class in (:permission_link_classes) AND "+
-                    "(#{sql_table}.head_uuid IN (:user_uuids) OR #{sql_table}.tail_uuid IN (:user_uuids)))"]
     end
 
-    where(sql_conds.join(' OR '),
-          owner_uuids: owner_uuids,
-          user_uuids: user_uuids,
-          permission_link_classes: ['permission', 'resources'])
+    self.where(sql_conds,
+               user_uuids: user_uuids,
+               permission_link_classes: ['permission', 'resources'])
   end
 
   def save_with_unique_name!
@@ -300,13 +378,14 @@ class ArvadosModel < ActiveRecord::Base
 
   def self.full_text_searchable_columns
     self.columns.select do |col|
-      col.type == :string or col.type == :text
+      [:string, :text, :jsonb].include?(col.type)
     end.map(&:name)
   end
 
   def self.full_text_tsvector
     parts = full_text_searchable_columns.collect do |column|
-      "coalesce(#{column},'')"
+      cast = serialized_attributes[column] ? '::text' : ''
+      "coalesce(#{column}#{cast},'')"
     end
     "to_tsvector('english', #{parts.join(" || ' ' || ")})"
   end
@@ -458,14 +537,30 @@ class ArvadosModel < ActiveRecord::Base
 
   def update_modified_by_fields
     current_time = db_current_time
+    self.created_at = created_at_was || current_time
     self.updated_at = current_time
     self.owner_uuid ||= current_default_owner if self.respond_to? :owner_uuid=
     self.modified_at = current_time
-    self.modified_by_user_uuid = current_user ? current_user.uuid : nil
+    if !anonymous_updater
+      self.modified_by_user_uuid = current_user ? current_user.uuid : nil
+    end
     self.modified_by_client_uuid = current_api_client ? current_api_client.uuid : nil
     true
   end
 
+  def self.has_nonstring_keys? x
+    if x.is_a? Hash
+      x.each do |k,v|
+        return true if !(k.is_a?(String) || k.is_a?(Symbol)) || has_nonstring_keys?(v)
+      end
+    elsif x.is_a? Array
+      x.each do |v|
+        return true if has_nonstring_keys?(v)
+      end
+    end
+    false
+  end
+
   def self.has_symbols? x
     if x.is_a? Hash
       x.each do |k,v|
@@ -501,9 +596,24 @@ class ArvadosModel < ActiveRecord::Base
     end
   end
 
-  def self.where_serialized(colname, value)
-    sorted = deep_sort_hash(value)
-    where("#{colname.to_s} IN (?)", [sorted.to_yaml, SafeJSON.dump(sorted)])
+  def self.where_serialized(colname, value, md5: false)
+    colsql = colname.to_s
+    if md5
+      colsql = "md5(#{colsql})"
+    end
+    if value.empty?
+      # rails4 stores as null, rails3 stored as serialized [] or {}
+      sql = "#{colsql} is null or #{colsql} IN (?)"
+      sorted = value
+    else
+      sql = "#{colsql} IN (?)"
+      sorted = deep_sort_hash(value)
+    end
+    params = [sorted.to_yaml, SafeJSON.dump(sorted)]
+    if md5
+      params = params.map { |x| Digest::MD5.hexdigest(x) }
+    end
+    where(sql, params)
   end
 
   Serializer = {
@@ -512,25 +622,18 @@ class ArvadosModel < ActiveRecord::Base
   }
 
   def self.serialize(colname, type)
-    super(colname, Serializer[type])
+    coder = Serializer[type]
+    @serialized_attributes ||= {}
+    @serialized_attributes[colname.to_s] = coder
+    super(colname, coder)
   end
 
-  def ensure_serialized_attribute_type
-    # Specifying a type in the "serialize" declaration causes rails to
-    # raise an exception if a different data type is retrieved from
-    # the database during load().  The validation preventing such
-    # crash-inducing records from being inserted in the database in
-    # the first place seems to have been left as an exercise to the
-    # developer.
-    self.class.serialized_attributes.each do |colname, attr|
-      if attr.object_class
-        if self.attributes[colname].class != attr.object_class
-          self.errors.add colname.to_sym, "must be a #{attr.object_class.to_s}, not a #{self.attributes[colname].class.to_s}"
-        elsif self.class.has_symbols? attributes[colname]
-          self.errors.add colname.to_sym, "must not contain symbols: #{attributes[colname].inspect}"
-        end
-      end
-    end
+  def self.serialized_attributes
+    @serialized_attributes ||= {}
+  end
+
+  def serialized_attributes
+    self.class.serialized_attributes
   end
 
   def convert_serialized_symbols_to_strings
@@ -543,8 +646,8 @@ class ArvadosModel < ActiveRecord::Base
     self.class.serialized_attributes.each do |colname, attr|
       if self.class.has_symbols? attributes[colname]
         attributes[colname] = self.class.recursive_stringify attributes[colname]
-        self.send(colname + '=',
-                  self.class.recursive_stringify(attributes[colname]))
+        send(colname + '=',
+             self.class.recursive_stringify(attributes[colname]))
       end
     end
   end
@@ -662,42 +765,57 @@ class ArvadosModel < ActiveRecord::Base
     if self == ArvadosModel
       # If called directly as ArvadosModel.find_by_uuid rather than via subclass,
       # delegate to the appropriate subclass based on the given uuid.
-      self.resource_class_for_uuid(uuid).unscoped.find_by_uuid(uuid)
+      self.resource_class_for_uuid(uuid).find_by_uuid(uuid)
     else
       super
     end
   end
 
+  def is_audit_logging_enabled?
+    return !(Rails.configuration.max_audit_log_age.to_i == 0 &&
+             Rails.configuration.max_audit_log_delete_batch.to_i > 0)
+  end
+
   def log_start_state
-    @old_attributes = Marshal.load(Marshal.dump(attributes))
-    @old_logged_attributes = Marshal.load(Marshal.dump(logged_attributes))
+    if is_audit_logging_enabled?
+      @old_attributes = Marshal.load(Marshal.dump(attributes))
+      @old_logged_attributes = Marshal.load(Marshal.dump(logged_attributes))
+    end
   end
 
   def log_change(event_type)
-    log = Log.new(event_type: event_type).fill_object(self)
-    yield log
-    log.save!
-    log_start_state
+    if is_audit_logging_enabled?
+      log = Log.new(event_type: event_type).fill_object(self)
+      yield log
+      log.save!
+      log_start_state
+    end
   end
 
   def log_create
-    log_change('create') do |log|
-      log.fill_properties('old', nil, nil)
-      log.update_to self
+    if is_audit_logging_enabled?
+      log_change('create') do |log|
+        log.fill_properties('old', nil, nil)
+        log.update_to self
+      end
     end
   end
 
   def log_update
-    log_change('update') do |log|
-      log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
-      log.update_to self
+    if is_audit_logging_enabled?
+      log_change('update') do |log|
+        log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
+        log.update_to self
+      end
     end
   end
 
   def log_destroy
-    log_change('delete') do |log|
-      log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
-      log.update_to nil
+    if is_audit_logging_enabled?
+      log_change('delete') do |log|
+        log.fill_properties('old', etag(@old_attributes), @old_logged_attributes)
+        log.update_to nil
+      end
     end
   end
 end