X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c5c82ef67b9dc3cb3619e2bef3a86b9b0f0912e8..c36ec856598f214e340e3335ddd347d131335bf8:/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb diff --git a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb old mode 100755 new mode 100644 index 47f5398265..c0cd40d28e --- a/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb +++ b/services/api/db/migrate/20190322174136_add_file_info_to_collection.rb @@ -2,57 +2,16 @@ # # SPDX-License-Identifier: AGPL-3.0 -require "arvados/keep" - -class AddFileInfoToCollection < ActiveRecord::Migration - def do_batch(pdhs) - pdhs_str = '' - pdhs.each do |pdh| - pdhs_str << "'" << pdh << "'" << "," - end - - collections = ActiveRecord::Base.connection.exec_query( - "SELECT DISTINCT portable_data_hash, manifest_text FROM collections "\ - "WHERE portable_data_hash IN (#{pdhs_str[0..-2]}) " - ) - - collections.rows.each do |row| - manifest = Keep::Manifest.new(row[1]) - ActiveRecord::Base.connection.exec_query("BEGIN") - ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{manifest.files_count}, "\ - "file_size_total=#{manifest.files_size} "\ - "WHERE portable_data_hash='#{row[0]}'") - ActiveRecord::Base.connection.exec_query("COMMIT") - end - end - +class AddFileInfoToCollection < ActiveRecord::Migration[4.2] def up add_column :collections, :file_count, :integer, default: 0, null: false add_column :collections, :file_size_total, :integer, limit: 8, default: 0, null: false - distinct_pdh_count = ActiveRecord::Base.connection.exec_query( - "SELECT DISTINCT portable_data_hash FROM collections" - ).rows.count - - # Generator that queries for all the distince pdhs greater than last_pdh - ordered_pdh_query = lambda { |last_pdh, &block| - pdhs = ActiveRecord::Base.connection.exec_query( - "SELECT DISTINCT portable_data_hash FROM collections "\ - "WHERE portable_data_hash > '#{last_pdh}' "\ - "ORDER BY portable_data_hash LIMIT 1000" - ) - pdhs.rows.each do |row| - block.call(row[0]) - end - } - - batch_size_max = 1 << 28 # 256 MiB - Container.group_pdhs_for_multiple_transactions(ordered_pdh_query, - distinct_pdh_count, - batch_size_max, - "AddFileInfoToCollection") do |pdhs| - do_batch(pdhs) - end + puts "Collections now have two new columns, file_count and file_size_total." + puts "They were initialized with a zero value. If you are upgrading an Arvados" + puts "installation, please run the populate-file-info-columns-in-collections.rb" + puts "script to populate the columns. If this is a new installation, that is not" + puts "necessary." end def down