14484: Fixes grouping bug by simplifying nested pdhs arrays
[arvados.git] / services / api / db / migrate / 20190322174136_add_file_info_to_collection.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 class AddFileInfoToCollection < ActiveRecord::Migration
6   def do_batch(pdhs)
7     pdhs_str = ''
8     pdhs.each do |pdh|
9       pdhs_str << "'" << pdh << "'" << ','
10     end
11
12     collections = ActiveRecord::Base.connection.exec_query(
13       'SELECT DISTINCT portable_data_hash, manifest_text FROM collections '\
14       "WHERE portable_data_hash IN (#{pdhs_str[0..-2]}) "
15     )
16
17     collections.rows.each do |row|
18       file_count = 0
19       file_size_total = 0
20       row[1].scan(/\S+/) do |token|
21         is_file = token.match(/^[[:digit:]]+:[[:digit:]]+:/)
22         if is_file
23           _, filesize, filename = token.split(':', 3)
24
25           # Avoid counting empty dir placeholders
26           break if filename == '.' && filesize.zero?
27
28           file_count += 1
29           file_size_total += filesize.to_i
30         end
31       end
32       ActiveRecord::Base.connection.exec_query('BEGIN')
33       ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{file_count}, "\
34                                                "file_size_total=#{file_size_total} "\
35                                                "WHERE portable_data_hash='#{row[0]}'")
36       ActiveRecord::Base.connection.exec_query('COMMIT')
37     end
38   end
39
40   def up
41     add_column :collections, :file_count, :integer, default: 0, null: false
42     add_column :collections, :file_size_total, :integer, default: 0, null: false
43
44     Container.group_pdhs_for_multiple_transactions('AddFileInfoToCollection') do |pdhs|
45       do_batch(pdhs)
46     end
47   end
48
49   def down
50     remove_column :collections, :file_count
51     remove_column :collections, :file_size_total
52   end
53 end