|current_version_uuid|string|UUID of the collection's current version. On new collections, it'll be equal to the @uuid@ attribute.||
|version|number|Version number, starting at 1 on new collections. This attribute is read-only.||
|preserve_version|boolean|When set to true on a current version, it will be saved on the next versionable update.||
+|file_count|number|The total number of files in the collection. This attribute is read-only.||
+|file_size_total|number|The sum of the file sizes in the collection. This attribute is read-only.||
h3. Conditions of creating a Collection
validate :ensure_storage_classes_contain_non_empty_strings
validate :versioning_metadata_updates, on: :update
validate :past_versions_cannot_be_updated, on: :update
+ after_validation :set_file_count_and_total_size
before_save :set_file_names
around_update :manage_versioning
t.add :version
t.add :current_version_uuid
t.add :preserve_version
+ t.add :file_count
+ t.add :file_size_total
end
after_initialize do
true
end
+ def set_file_count_and_total_size
+ # Only update the file stats if the manifest changed
+ if self.manifest_text_changed?
+ m = Keep::Manifest.new(self.manifest_text)
+ self.file_size_total = m.files_size
+ self.file_count = m.files_count
+ # If the manifest didn't change but the attributes did, ignore the changes
+ elsif self.file_count_changed? || self.file_size_total_changed?
+ self.file_count = self.file_count_was
+ self.file_size_total = self.file_size_total_was
+ end
+ true
+ end
+
def manifest_files
return '' if !self.manifest_text
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require "arvados/keep"
+require "group_pdhs"
+
+class AddFileInfoToCollection < ActiveRecord::Migration
+ def do_batch(pdhs)
+ pdhs_str = ''
+ pdhs.each do |pdh|
+ pdhs_str << "'" << pdh << "'" << ","
+ end
+
+ collections = ActiveRecord::Base.connection.exec_query(
+ "SELECT DISTINCT portable_data_hash, manifest_text FROM collections "\
+ "WHERE portable_data_hash IN (#{pdhs_str[0..-2]}) "
+ )
+
+ collections.rows.each do |row|
+ manifest = Keep::Manifest.new(row[1])
+ ActiveRecord::Base.connection.exec_query("BEGIN")
+ ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{manifest.files_count}, "\
+ "file_size_total=#{manifest.files_size} "\
+ "WHERE portable_data_hash='#{row[0]}'")
+ ActiveRecord::Base.connection.exec_query("COMMIT")
+ end
+ end
+
+ def up
+ add_column :collections, :file_count, :integer, default: 0, null: false
+ add_column :collections, :file_size_total, :integer, limit: 8, default: 0, null: false
+
+ distinct_pdh_count = ActiveRecord::Base.connection.exec_query(
+ "SELECT DISTINCT portable_data_hash FROM collections"
+ ).rows.count
+
+ # Generator that queries for all the distinct pdhs greater than last_pdh
+ ordered_pdh_query = lambda { |last_pdh, &block|
+ pdhs = ActiveRecord::Base.connection.exec_query(
+ "SELECT DISTINCT portable_data_hash FROM collections "\
+ "WHERE portable_data_hash > '#{last_pdh}' "\
+ "ORDER BY portable_data_hash LIMIT 1000"
+ )
+ pdhs.rows.each do |row|
+ block.call(row[0])
+ end
+ }
+
+ batch_size_max = 1 << 28 # 256 MiB
+ GroupPdhs.group_pdhs_for_multiple_transactions(ordered_pdh_query,
+ distinct_pdh_count,
+ batch_size_max,
+ "AddFileInfoToCollection") do |pdhs|
+ do_batch(pdhs)
+ end
+ end
+
+ def down
+ remove_column :collections, :file_count
+ remove_column :collections, :file_size_total
+ end
+end
storage_classes_confirmed_at timestamp without time zone,
current_version_uuid character varying,
version integer DEFAULT 1 NOT NULL,
- preserve_version boolean DEFAULT false
+ preserve_version boolean DEFAULT false,
+ file_count integer DEFAULT 0 NOT NULL,
+ file_size_total bigint DEFAULT 0 NOT NULL
);
INSERT INTO schema_migrations (version) VALUES ('20190214214814');
+INSERT INTO schema_migrations (version) VALUES ('20190322174136');
+
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+module GroupPdhs
+ # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+ #
+ # Change with caution!
+ #
+ # Correctly groups pdhs to use for batch database updates. Helps avoid
+ # updating too many database rows in a single transaction.
+ def self.group_pdhs_for_multiple_transactions(distinct_ordered_pdhs, distinct_pdh_count, batch_size_max, log_prefix)
+ batch_size = 0
+ batch_pdhs = {}
+ last_pdh = '0'
+ done = 0
+ any = true
+
+ while any
+ any = false
+ distinct_ordered_pdhs.call(last_pdh) do |pdh|
+ any = true
+ last_pdh = pdh
+ manifest_size = pdh.split('+')[1].to_i
+ if batch_size > 0 && batch_size + manifest_size > batch_size_max
+ yield batch_pdhs.keys
+ done += batch_pdhs.size
+ Rails.logger.info(log_prefix + ": #{done}/#{distinct_pdh_count}")
+ batch_pdhs = {}
+ batch_size = 0
+ end
+ batch_pdhs[pdh] = true
+ batch_size += manifest_size
+ end
+ end
+ yield batch_pdhs.keys
+ Rails.logger.info(log_prefix + ": finished")
+ end
+end
name: owned_by_active
version: 2
+collection_owned_by_active_with_file_stats:
+ uuid: zzzzz-4zz18-fjeod4od92kfj5f
+ current_version_uuid: zzzzz-4zz18-fjeod4od92kfj5f
+ portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+ owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ created_at: 2014-02-03T17:22:54Z
+ modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+ modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+ modified_at: 2014-02-03T17:22:54Z
+ updated_at: 2014-02-03T17:22:54Z
+ manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"
+ file_count: 1
+ file_size_total: 3
+ name: owned_by_active_with_file_stats
+ version: 2
+
collection_owned_by_active_past_version_1:
uuid: zzzzz-4zz18-znfnqtbbv4spast
current_version_uuid: zzzzz-4zz18-bv31uwvy3neko21
assert_equal 'value_1', json_response['properties']['property_1']
end
+ [
+ [". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n", 1, 34],
+ [". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt 0:30:foo.txt 0:30:foo1.txt 0:30:foo2.txt 0:30:foo3.txt 0:30:foo4.txt\n", 5, 184],
+ [". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n", 0, 0]
+ ].each do |manifest, count, size|
+ test "create collection with valid manifest #{manifest} and expect file stats" do
+ authorize_with :active
+ post :create, {
+ collection: {
+ manifest_text: manifest
+ }
+ }
+ assert_response 200
+ assert_equal count, json_response['file_count']
+ assert_equal size, json_response['file_size_total']
+ end
+ end
+
+ test "update collection manifest and expect new file stats" do
+ authorize_with :active
+ post :update, {
+ id: collections(:collection_owned_by_active_with_file_stats).uuid,
+ collection: {
+ manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n"
+ }
+ }
+ assert_response 200
+ assert_equal 1, json_response['file_count']
+ assert_equal 34, json_response['file_size_total']
+ end
+
+ [
+ ['file_count', 1],
+ ['file_size_total', 34]
+ ].each do |attribute, val|
+ test "create collection with #{attribute} and expect overwrite" do
+ authorize_with :active
+ post :create, {
+ collection: {
+ manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n",
+ "#{attribute}": 10
+ }
+ }
+ assert_response 200
+ assert_equal val, json_response[attribute]
+ end
+ end
+
+ [
+ ['file_count', 1],
+ ['file_size_total', 3]
+ ].each do |attribute, val|
+ test "update collection with #{attribute} and expect ignore" do
+ authorize_with :active
+ post :update, {
+ id: collections(:collection_owned_by_active_with_file_stats).uuid,
+ collection: {
+ "#{attribute}": 10
+ }
+ }
+ assert_response 200
+ assert_equal val, json_response[attribute]
+ end
+ end
+
+ [
+ ['file_count', 1],
+ ['file_size_total', 34]
+ ].each do |attribute, val|
+ test "update collection with #{attribute} and manifest and expect manifest values" do
+ authorize_with :active
+ post :update, {
+ id: collections(:collection_owned_by_active_with_file_stats).uuid,
+ collection: {
+ manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n",
+ "#{attribute}": 10
+ }
+ }
+ assert_response 200
+ assert_equal val, json_response[attribute]
+ end
+ end
+
[
". 0:0:foo.txt",
". d41d8cd98f00b204e9800998ecf8427e foo.txt",
end
end
+ [
+ [". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n", 1, 34],
+ [". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt 0:30:foo.txt 0:30:foo1.txt 0:30:foo2.txt 0:30:foo3.txt 0:30:foo4.txt\n", 5, 184],
+ [". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n", 0, 0]
+ ].each do |manifest, count, size|
+ test "file stats on create collection with #{manifest}" do
+ act_as_system_user do
+ c = Collection.create(manifest_text: manifest)
+ assert_equal count, c.file_count
+ assert_equal size, c.file_size_total
+ end
+ end
+ end
+
+ test "file stats cannot be changed unless through manifest change" do
+ act_as_system_user do
+ # Direct changes to file stats should be ignored
+ c = Collection.create(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n")
+ c.file_count = 6
+ c.file_size_total = 30
+ assert c.valid?
+ assert_equal 1, c.file_count
+ assert_equal 34, c.file_size_total
+
+ # File stats specified on create should be ignored and overwritten
+ c = Collection.create(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n", file_count: 10, file_size_total: 10)
+ assert c.valid?
+ assert_equal 1, c.file_count
+ assert_equal 34, c.file_size_total
+
+ # Updating the manifest should change file stats
+ c.update_attributes(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt 0:34:foo2.txt\n")
+ assert c.valid?
+ assert_equal 2, c.file_count
+ assert_equal 68, c.file_size_total
+
+ # Updating file stats and the manifest should use manifest values
+ c.update_attributes(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e 0:34:foo.txt\n", file_count:10, file_size_total: 10)
+ assert c.valid?
+ assert_equal 1, c.file_count
+ assert_equal 34, c.file_size_total
+
+ # Updating just the file stats should be ignored
+ c.update_attributes(file_count: 10, file_size_total: 10)
+ assert c.valid?
+ assert_equal 1, c.file_count
+ assert_equal 34, c.file_size_total
+ end
+ end
+
[
nil,
"",
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require 'test_helper'
+require 'group_pdhs'
+
+# NOTE: Migration 20190322174136_add_file_info_to_collection.rb
+# relies on this test. Change with caution!
+class GroupPdhsTest < ActiveSupport::TestCase
+ test "pdh_grouping_by_manifest_size" do
+ batch_size_max = 200
+ pdhs_in = ['x1+30', 'x2+30', 'x3+201', 'x4+100', 'x5+100']
+ pdh_lambda = lambda { |last_pdh, &block|
+ pdhs = pdhs_in.select{|pdh| pdh > last_pdh}
+ pdhs.each do |p|
+ block.call(p)
+ end
+ }
+ batched_pdhs = []
+ GroupPdhs.group_pdhs_for_multiple_transactions(pdh_lambda, pdhs_in.size, batch_size_max, "") do |pdhs|
+ batched_pdhs << pdhs
+ end
+ expected = [['x1+30', 'x2+30'], ['x3+201'], ['x4+100', 'x5+100']]
+ assert_equal(batched_pdhs, expected)
+ end
+end