Merge branch 'master' into 14873-api-rails5-upgrade
[arvados.git] / services / api / db / migrate / 20190322174136_add_file_info_to_collection.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require "arvados/keep"
6 require "group_pdhs"
7
8 class AddFileInfoToCollection < ActiveRecord::Migration
9   def do_batch(pdhs)
10     pdhs_str = ''
11     pdhs.each do |pdh|
12       pdhs_str << "'" << pdh << "'" << ","
13     end
14
15     collections = ActiveRecord::Base.connection.exec_query(
16       "SELECT DISTINCT portable_data_hash, manifest_text FROM collections "\
17       "WHERE portable_data_hash IN (#{pdhs_str[0..-2]}) "
18     )
19
20     collections.rows.each do |row|
21       manifest = Keep::Manifest.new(row[1])
22       ActiveRecord::Base.connection.exec_query("BEGIN")
23       ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{manifest.files_count}, "\
24                                                "file_size_total=#{manifest.files_size} "\
25                                                "WHERE portable_data_hash='#{row[0]}'")
26       ActiveRecord::Base.connection.exec_query("COMMIT")
27     end
28   end
29
30   def up
31     add_column :collections, :file_count, :integer, default: 0, null: false
32     add_column :collections, :file_size_total, :integer, limit: 8, default: 0, null: false
33
34     distinct_pdh_count = ActiveRecord::Base.connection.exec_query(
35       "SELECT DISTINCT portable_data_hash FROM collections"
36     ).rows.count
37
38     # Generator that queries for all the distinct pdhs greater than last_pdh
39     ordered_pdh_query = lambda { |last_pdh, &block|
40       pdhs = ActiveRecord::Base.connection.exec_query(
41         "SELECT DISTINCT portable_data_hash FROM collections "\
42         "WHERE portable_data_hash > '#{last_pdh}' "\
43         "ORDER BY portable_data_hash LIMIT 1000"
44       )
45       pdhs.rows.each do |row|
46         block.call(row[0])
47       end
48     }
49
50     batch_size_max = 1 << 28 # 256 MiB
51     GroupPdhs.group_pdhs_for_multiple_transactions(ordered_pdh_query,
52                                                    distinct_pdh_count,
53                                                    batch_size_max,
54                                                    "AddFileInfoToCollection") do |pdhs|
55       do_batch(pdhs)
56     end
57   end
58
59   def down
60     remove_column :collections, :file_count
61     remove_column :collections, :file_size_total
62   end
63 end