Merge branch '17984-cwl-project-uuid-users' into main
[arvados.git] / services / api / lib / group_pdhs.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 module GroupPdhs
6   # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
7   #
8   # Change with caution!
9   #
10   # Correctly groups pdhs to use for batch database updates. Helps avoid
11   # updating too many database rows in a single transaction.
12   def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
13     batch_size = 0
14     batch_pdhs = {}
15     last_pdh = '0'
16     done = 0
17     any = true
18
19     while any
20       any = false
21       distinct_ordered_pdhs.call(last_pdh) do |pdh|
22         any = true
23         last_pdh = pdh
24         manifest_size = pdh.split('+')[1].to_i
25         if batch_size > 0 && batch_size + manifest_size > batch_size_max
26           yield batch_pdhs.keys
27           done += batch_pdhs.size
28           Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
29           batch_pdhs = {}
30           batch_size = 0
31         end
32         batch_pdhs[pdh] = true
33         batch_size += manifest_size
34       end
35     end
36     yield batch_pdhs.keys
37     Rails.logger.info(log_prefix + ": finished")
38   end
39 end