#
# Correctly groups pdhs to use for batch database updates. Helps avoid
# updating too many database rows in a single transaction.
- def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, log_prefix)
- batch_size_max = 1 << 28 # 256 MiB
+ def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
batch_size = 0
batch_pdhs = {}
last_pdh = '0'
end
}
- Container.group_pdhs_for_multiple_transactions(ordered_pdh_query, distinct_pdh_count, "AddFileInfoToCollection") do |pdhs|
+ batch_size_max = 1 << 28 # 256 MiB
+ Container.group_pdhs_for_multiple_transactions(ordered_pdh_query,
+ distinct_pdh_count,
+ batch_size_max,
+ "AddFileInfoToCollection") do |pdhs|
do_batch(pdhs)
end
end
test "pdh_grouping_by_manifest_size" do
batch_size_max = 200
pdhs_in = ['x1+30', 'x2+30', 'x3+201', 'x4+100', 'x5+100']
+ pdh_lambda = lambda { |last_pdh, &block|
+ pdhs = pdhs_in.select{|pdh| pdh > last_pdh}
+ pdhs.each do |p|
+ block.call(p)
+ end
+ }
batched_pdhs = []
- Container.group_pdhs_by_manifest_size(pdhs_in, batch_size_max) do |pdhs|
+ Container.group_pdhs_for_multiple_transactions(pdh_lambda, pdhs_in.size, batch_size_max, "") do |pdhs|
batched_pdhs << pdhs
end
expected = [['x1+30', 'x2+30'], ['x3+201'], ['x4+100', 'x5+100']]