14484: Adds test for pdh grouping functionality in the container model
[arvados.git] / services / api / db / migrate / 20190322174136_add_file_info_to_collection.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require "arvados/keep"
6
7 class AddFileInfoToCollection < ActiveRecord::Migration
8   def do_batch(pdhs)
9     pdhs_str = ''
10     pdhs.each do |pdh|
11       pdhs_str << "'" << pdh << "'" << ","
12     end
13
14     collections = ActiveRecord::Base.connection.exec_query(
15       "SELECT DISTINCT portable_data_hash, manifest_text FROM collections "\
16       "WHERE portable_data_hash IN (#{pdhs_str[0..-2]}) "
17     )
18
19     collections.rows.each do |row|
20       manifest = Keep::Manifest.new(row[1])
21       ActiveRecord::Base.connection.exec_query("BEGIN")
22       ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{manifest.files_count}, "\
23                                                "file_size_total=#{manifest.files_size} "\
24                                                "WHERE portable_data_hash='#{row[0]}'")
25       ActiveRecord::Base.connection.exec_query("COMMIT")
26     end
27   end
28
29   def up
30     add_column :collections, :file_count, :integer, default: 0, null: false
31     add_column :collections, :file_size_total, :integer, default: 0, null: false
32
33     distinct_pdh_count = ActiveRecord::Base.connection.exec_query(
34       "SELECT DISTINCT portable_data_hash FROM collections"
35     ).rows.count
36
37     # Generator that queries for all the distince pdhs greater than last_pdh
38     ordered_pdh_query = lambda { |last_pdh, &block|
39       pdhs = ActiveRecord::Base.connection.exec_query(
40         "SELECT DISTINCT portable_data_hash FROM collections "\
41         "WHERE portable_data_hash > '#{last_pdh}' "\
42         "ORDER BY portable_data_hash LIMIT 1000"
43       )
44       pdhs.rows.each do |row|
45         block.call(row[0])
46       end
47     }
48
49     batch_size_max = 1 << 28 # 256 MiB
50     Container.group_pdhs_for_multiple_transactions(ordered_pdh_query,
51                                                    distinct_pdh_count,
52                                                    batch_size_max,
53                                                    "AddFileInfoToCollection") do |pdhs|
54       do_batch(pdhs)
55     end
56   end
57
58   def down
59     remove_column :collections, :file_count
60     remove_column :collections, :file_size_total
61   end
62 end