1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
6 require 'kind_and_etag'
8 class FixCollectionPortableDataHashWithHintedManifest < ActiveRecord::Migration[4.2]
9 include CurrentApiClient
11 class ArvadosModel < ActiveRecord::Base
12 self.abstract_class = true
13 extend HasUuid::ClassMethods
14 include CurrentApiClient
16 before_create do |record|
17 record.uuid ||= record.class.generate_uuid
18 record.owner_uuid ||= system_user_uuid
20 serialize :properties, Hash
23 # Clean up the name of the stub model class so we generate correct UUIDs.
24 super.sub("FixCollectionPortableDataHashWithHintedManifest::", "")
28 class Collection < ArvadosModel
31 class Log < ArvadosModel
32 def self.log_for(thing, age="old")
33 { "#{age}_etag" => thing.etag,
34 "#{age}_attributes" => thing.attributes,
38 def self.log_create(thing)
39 new_log("create", thing, log_for(thing, "new"))
42 def self.log_update(thing, start_state)
43 new_log("update", thing, start_state.merge(log_for(thing, "new")))
46 def self.log_destroy(thing)
47 new_log("destroy", thing, log_for(thing, "old"))
52 def self.new_log(event_type, thing, properties)
53 create!(event_type: event_type,
55 object_uuid: thing.uuid,
56 object_owner_uuid: thing.owner_uuid,
57 properties: properties)
61 def each_bad_collection
62 end_coll = Collection.order("id DESC").first
63 return if end_coll.nil?
65 ("A".."Z").each do |hint_char|
67 where("id <= ? AND manifest_text LIKE '%+#{hint_char}%'", end_coll.id)
68 unless seen_uuids.empty?
69 query = query.where("uuid NOT IN (?)", seen_uuids)
71 # It's important to make sure that this line doesn't swap. The
72 # worst case scenario is that it finds a batch of collections that
73 # all have maximum size manifests (64MiB). With a batch size of
74 # 50, that's about 3GiB. Figure it will end up being 4GiB after
75 # other ActiveRecord overhead. That's a size we're comfortable with.
76 query.find_each(batch_size: 50) do |coll|
77 seen_uuids << coll.uuid
78 stripped_manifest = coll.manifest_text.
79 gsub(/( [0-9a-f]{32}(\+\d+)?)\+\S+/, '\1')
80 stripped_pdh = sprintf("%s+%i",
81 Digest::MD5.hexdigest(stripped_manifest),
82 stripped_manifest.bytesize)
83 yield [coll, stripped_pdh] if (coll.portable_data_hash != stripped_pdh)
89 Collection.reset_column_information
90 Log.reset_column_information
92 [:owner_uuid, :created_at, :modified_by_client_uuid, :manifest_text,
93 :modified_by_user_uuid, :modified_at, :updated_at, :name,
94 :description, :portable_data_hash, :replication_desired,
95 :replication_confirmed, :replication_confirmed_at, :expires_at]
96 new_expiry = Date.new(2038, 1, 31)
98 each_bad_collection do |coll, stripped_pdh|
99 # Create a copy of the collection including bad portable data hash,
100 # with an expiration. This makes it possible to resolve the bad
101 # portable data hash, but the expiration can hide the Collection
102 # from more user-friendly interfaces like Workbench.
103 start_log = Log.log_for(coll)
104 attributes = Hash[copied_attr_names.map { |key| [key, coll.send(key)] }]
105 attributes[:expires_at] ||= new_expiry
106 attributes[:properties] = (coll.properties.dup rescue {})
107 attributes[:properties]["migrated_from"] ||= coll.uuid
108 coll_copy = Collection.create!(attributes)
109 Log.log_create(coll_copy)
110 coll.update(portable_data_hash: stripped_pdh)
111 Log.log_update(coll, start_log)
116 Collection.reset_column_information
117 Log.reset_column_information
118 each_bad_collection do |coll, stripped_pdh|
119 if ((src_uuid = coll.properties["migrated_from"]) and
120 (src_coll = Collection.where(uuid: src_uuid).first) and
121 (src_coll.portable_data_hash == stripped_pdh))
122 start_log = Log.log_for(src_coll)
123 src_coll.portable_data_hash = coll.portable_data_hash
125 Log.log_update(src_coll, start_log)
126 coll.destroy or raise Exception.new("failed to destroy old collection")
127 Log.log_destroy(coll)