}.merge opts)
end
- def self.limit_index_columns_read
- # This method returns a list of column names.
- # If an index request reads that column from the database,
- # find_objects_for_index will only fetch objects until it reads
- # max_index_database_read bytes of data from those columns.
- []
- end
-
def find_objects_for_index
@objects ||= model_class.readable_by(*@read_users)
apply_where_limit_order_params
- limit_database_read if (action_name == "index")
end
def apply_filters model_class=nil
@objects = @objects.uniq(@distinct) if not @distinct.nil?
end
- def limit_database_read
- limit_columns = self.class.limit_index_columns_read
+ # limit_database_read ensures @objects (which must be an
+ # ActiveRelation) does not return too many results to fit in memory,
+ # by previewing the results and calling @objects.limit() if
+ # necessary.
+ def limit_database_read(model_class:)
+ return if @limit == 0 || @limit == 1
+ model_class ||= self.model_class
+ limit_columns = model_class.limit_index_columns_read
limit_columns &= model_class.columns_for_attributes(@select) if @select
return if limit_columns.empty?
model_class.transaction do
limit_query = @objects.
- except(:select).
+ except(:select, :distinct).
select("(%s) as read_length" %
limit_columns.map { |s| "octet_length(#{s})" }.join(" + "))
new_limit = 0
read_total += record.read_length.to_i
if read_total >= Rails.configuration.max_index_database_read
new_limit -= 1 if new_limit > 1
+ @limit = new_limit
break
elsif new_limit >= @limit
break
end
end
- @limit = new_limit
@objects = @objects.limit(@limit)
# Force @objects to run its query inside this transaction.
@objects.each { |_| break }
end
accept_param_as_json :reader_tokens, Array
- def object_list
+ def object_list(model_class:)
+ if @objects.respond_to?(:except)
+ limit_database_read(model_class: model_class)
+ end
list = {
:kind => "arvados##{(@response_resource_name || resource_name).camelize(:lower)}List",
:etag => "",
end
def render_list
- send_json object_list
+ send_json object_list(model_class: self.model_class)
end
def remote_ip
class Arvados::V1::CollectionsController < ApplicationController
include DbCurrentTime
- def self.limit_index_columns_read
- ["manifest_text"]
- end
-
def create
if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
resource_attrs[:portable_data_hash] = loc.to_s
@objects = klass.readable_by(*@read_users).
order(request_order).where(where_conds)
- @limit = limit_all - all_objects.count
+ klass_limit = limit_all - all_objects.count
+ @limit = klass_limit
apply_where_limit_order_params klass
- klass_object_list = object_list
+ klass_object_list = object_list(model_class: klass)
klass_items_available = klass_object_list[:items_available] || 0
@items_available += klass_items_available
@offset = [@offset - klass_items_available, 0].max
all_objects += klass_object_list[:items]
+
+ if klass_object_list[:limit] < klass_limit
+ # object_list() had to reduce @limit to comply with
+ # max_index_database_read. From now on, we'll do all queries
+ # with limit=0 and just accumulate items_available.
+ limit_all = all_objects.count
+ end
end
@objects = all_objects
["id", "uuid"]
end
+ def self.limit_index_columns_read
+ # This method returns a list of column names.
+ # If an index request reads that column from the database,
+ # APIs that return lists will only fetch objects until reaching
+ # max_index_database_read bytes of data from those columns.
+ []
+ end
+
# If current user can manage the object, return an array of uuids of
# users and groups that have permission to write the object. The
# first two elements are always [self.owner_uuid, current user's
super + ["updated_at", "file_names"]
end
+ def self.limit_index_columns_read
+ ["manifest_text"]
+ end
+
FILE_TOKEN = /^[[:digit:]]+:[[:digit:]]+:/
def check_signatures
return false if self.manifest_text.nil?
Running => [Complete, Cancelled]
}
+ def self.limit_index_columns_read
+ ["mounts"]
+ end
+
def state_transitions
State_transitions
end
:runtime_constraints, :state, :container_uuid, :use_existing,
:scheduling_parameters, :output_name, :output_ttl]
+ def self.limit_index_columns_read
+ ["mounts"]
+ end
+
def state_transitions
State_transitions
end
@need_crunch_dispatch_trigger = false
end
+ def self.limit_index_columns_read
+ ["components"]
+ end
+
def assert_finished
update_attributes(finished_at: finished_at || db_current_time,
success: success.nil? ? false : success,
(Complete = 'Complete'),
]
+ def self.limit_index_columns_read
+ ["components"]
+ end
+
# if all components have input, the pipeline is Ready
def components_look_ready?
if !self.components || self.components.empty?
t.add :components
t.add :description
end
+
+ def self.limit_index_columns_read
+ ["components"]
+ end
end
def self.full_text_searchable_columns
super - ["definition"]
end
+
+ def self.limit_index_columns_read
+ ["definition"]
+ end
end
# normally be returned in a single response).
# Note 1: This setting never reduces the number of returned rows to
# zero, no matter how big the first data row is.
- # Note 2: Currently, this only limits the
- # arvados.v1.collections.list API (GET /arvados/v1/collections), and
- # only takes the size of manifest_text into account. Other fields
- # (e.g., "properties" hashes) are not counted against this limit
- # when returning collections, and the limit is not applied at all
- # for other data types.
+ # Note 2: Currently, this is only checked against a specific set of
+ # columns that tend to get large (collections.manifest_text,
+ # containers.mounts, workflows.definition). Other fields (e.g.,
+ # "properties" hashes) are not counted against this limit.
max_index_database_read: 134217728
# Maximum number of items to return when responding to a APIs that
}
check_project_contents_response %w'arvados#pipelineInstance arvados#job'
end
+
+ test 'get contents with low max_index_database_read' do
+ # Some result will certainly have at least 12 bytes in a
+ # restricted column
+ Rails.configuration.max_index_database_read = 12
+ authorize_with :active
+ get :contents, {
+ id: groups(:aproject).uuid,
+ format: :json,
+ }
+ assert_response :success
+ assert_not_empty(json_response['items'])
+ assert_operator(json_response['items'].count,
+ :<, json_response['items_available'])
+ end
end