Merge branch '13493-document-federation' refs #13493
[arvados.git] / services / api / db / migrate / 20180917205609_recompute_file_names_index.rb
1 class RecomputeFileNamesIndex < ActiveRecord::Migration
2   def do_batch(pdhs:)
3     ActiveRecord::Base.connection.exec_query('BEGIN')
4     Collection.select(:portable_data_hash, :manifest_text).where(portable_data_hash: pdhs).distinct(:portable_data_hash).each do |c|
5       ActiveRecord::Base.connection.exec_query("update collections set file_names=$1 where portable_data_hash=$2",
6                                                "update file_names index",
7                                                [[nil, c.manifest_files], [nil, c.portable_data_hash]])
8     end
9     ActiveRecord::Base.connection.exec_query('COMMIT')
10   end
11   def up
12     # Process collections in multiple transactions, where the total
13     # size of all manifest_texts processed in a transaction is no more
14     # than batch_size_max.  Collections whose manifest_text is bigger
15     # than batch_size_max are updated in their own individual
16     # transactions.
17     batch_size_max = 1 << 28    # 256 MiB
18     batch_size = 0
19     batch_pdhs = {}
20     last_pdh = '0'
21     total = Collection.distinct.count(:portable_data_hash)
22     done = 0
23     any = true
24     while any
25       any = false
26       Collection.
27         unscoped.
28         select(:portable_data_hash).distinct.
29         order(:portable_data_hash).
30         where('portable_data_hash > ?', last_pdh).
31         limit(1000).each do |c|
32         any = true
33         last_pdh = c.portable_data_hash
34         manifest_size = c.portable_data_hash.split('+')[1].to_i
35         if batch_size > 0 && batch_size + manifest_size > batch_size_max
36           do_batch(pdhs: batch_pdhs.keys)
37           done += batch_pdhs.size
38           Rails.logger.info("RecomputeFileNamesIndex: #{done}/#{total}")
39           batch_pdhs = {}
40           batch_size = 0
41         end
42         batch_pdhs[c.portable_data_hash] = true
43         batch_size += manifest_size
44       end
45     end
46     do_batch(pdhs: batch_pdhs.keys)
47     Rails.logger.info("RecomputeFileNamesIndex: finished")
48   end
49   def down
50   end
51 end