2 require 'kind_and_etag'
4 class FixCollectionPortableDataHashWithHintedManifest < ActiveRecord::Migration
5 include CurrentApiClient
7 class ArvadosModel < ActiveRecord::Base
8 self.abstract_class = true
9 extend HasUuid::ClassMethods
10 include CurrentApiClient
12 before_create do |record|
13 record.uuid ||= record.class.generate_uuid
14 record.owner_uuid ||= system_user_uuid
16 serialize :properties, Hash
19 # Clean up the name of the stub model class so we generate correct UUIDs.
20 super.sub("FixCollectionPortableDataHashWithHintedManifest::", "")
24 class Collection < ArvadosModel
27 class Log < ArvadosModel
28 def self.log_for(thing, age="old")
29 { "#{age}_etag" => thing.etag,
30 "#{age}_attributes" => thing.attributes,
34 def self.log_create(thing)
35 new_log("create", thing, log_for(thing, "new"))
38 def self.log_update(thing, start_state)
39 new_log("update", thing, start_state.merge(log_for(thing, "new")))
42 def self.log_destroy(thing)
43 new_log("destroy", thing, log_for(thing, "old"))
48 def self.new_log(event_type, thing, properties)
49 create!(event_type: event_type,
51 object_uuid: thing.uuid,
52 object_owner_uuid: thing.owner_uuid,
53 properties: properties)
57 def each_bad_collection
58 end_coll = Collection.order("id DESC").first
59 return if end_coll.nil?
61 ("A".."Z").each do |hint_char|
63 where("id <= ? AND manifest_text LIKE '%+#{hint_char}%'", end_coll.id)
64 unless seen_uuids.empty?
65 query = query.where("uuid NOT IN (?)", seen_uuids)
67 # It's important to make sure that this line doesn't swap. The
68 # worst case scenario is that it finds a batch of collections that
69 # all have maximum size manifests (64MiB). With a batch size of
70 # 50, that's about 3GiB. Figure it will end up being 4GiB after
71 # other ActiveRecord overhead. That's a size we're comfortable with.
72 query.find_each(batch_size: 50) do |coll|
73 seen_uuids << coll.uuid
74 stripped_manifest = coll.manifest_text.
75 gsub(/( [0-9a-f]{32}(\+\d+)?)\+\S+/, '\1')
76 stripped_pdh = sprintf("%s+%i",
77 Digest::MD5.hexdigest(stripped_manifest),
78 stripped_manifest.bytesize)
79 yield [coll, stripped_pdh] if (coll.portable_data_hash != stripped_pdh)
85 Collection.reset_column_information
86 Log.reset_column_information
88 [:owner_uuid, :created_at, :modified_by_client_uuid, :manifest_text,
89 :modified_by_user_uuid, :modified_at, :updated_at, :name,
90 :description, :portable_data_hash, :replication_desired,
91 :replication_confirmed, :replication_confirmed_at, :expires_at]
92 new_expiry = Date.new(2038, 1, 31)
94 each_bad_collection do |coll, stripped_pdh|
95 # Create a copy of the collection including bad portable data hash,
96 # with an expiration. This makes it possible to resolve the bad
97 # portable data hash, but the expiration can hide the Collection
98 # from more user-friendly interfaces like Workbench.
99 start_log = Log.log_for(coll)
100 attributes = Hash[copied_attr_names.map { |key| [key, coll.send(key)] }]
101 attributes[:expires_at] ||= new_expiry
102 attributes[:properties] = (coll.properties.dup rescue {})
103 attributes[:properties]["migrated_from"] ||= coll.uuid
104 coll_copy = Collection.create!(attributes)
105 Log.log_create(coll_copy)
106 coll.update_attributes(portable_data_hash: stripped_pdh)
107 Log.log_update(coll, start_log)
112 Collection.reset_column_information
113 Log.reset_column_information
114 each_bad_collection do |coll, stripped_pdh|
115 if ((src_uuid = coll.properties["migrated_from"]) and
116 (src_coll = Collection.where(uuid: src_uuid).first) and
117 (src_coll.portable_data_hash == stripped_pdh))
118 start_log = Log.log_for(src_coll)
119 src_coll.portable_data_hash = coll.portable_data_hash
121 Log.log_update(src_coll, start_log)
122 coll.destroy or raise Exception.new("failed to destroy old collection")
123 Log.log_destroy(coll)