Merge branch '19993-groupby-forupdate'
[arvados.git] / services / api / db / migrate / 20180917205609_recompute_file_names_index.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 class RecomputeFileNamesIndex < ActiveRecord::Migration[4.2]
6   def do_batch(pdhs:)
7     ActiveRecord::Base.connection.exec_query('BEGIN')
8     Collection.select(:portable_data_hash, :manifest_text).where(portable_data_hash: pdhs).distinct(:portable_data_hash).each do |c|
9       ActiveRecord::Base.connection.exec_query("update collections set file_names=$1 where portable_data_hash=$2",
10                                                "update file_names index",
11                                                [[nil, c.manifest_files], [nil, c.portable_data_hash]])
12     end
13     ActiveRecord::Base.connection.exec_query('COMMIT')
14   end
15   def up
16     # Process collections in multiple transactions, where the total
17     # size of all manifest_texts processed in a transaction is no more
18     # than batch_size_max.  Collections whose manifest_text is bigger
19     # than batch_size_max are updated in their own individual
20     # transactions.
21     batch_size_max = 1 << 28    # 256 MiB
22     batch_size = 0
23     batch_pdhs = {}
24     last_pdh = '0'
25     total = Collection.distinct.count(:portable_data_hash)
26     done = 0
27     any = true
28     while any
29       any = false
30       Collection.
31         unscoped.
32         select(:portable_data_hash).distinct.
33         order(:portable_data_hash).
34         where('portable_data_hash > ?', last_pdh).
35         limit(1000).each do |c|
36         any = true
37         last_pdh = c.portable_data_hash
38         manifest_size = c.portable_data_hash.split('+')[1].to_i
39         if batch_size > 0 && batch_size + manifest_size > batch_size_max
40           do_batch(pdhs: batch_pdhs.keys)
41           done += batch_pdhs.size
42           Rails.logger.info("RecomputeFileNamesIndex: #{done}/#{total}")
43           batch_pdhs = {}
44           batch_size = 0
45         end
46         batch_pdhs[c.portable_data_hash] = true
47         batch_size += manifest_size
48       end
49     end
50     do_batch(pdhs: batch_pdhs.keys)
51     Rails.logger.info("RecomputeFileNamesIndex: finished")
52   end
53   def down
54   end
55 end