Fix 2.4.2 upgrade notes formatting refs #19330
[arvados.git] / services / api / db / migrate / 20150303210106_fix_collection_portable_data_hash_with_hinted_manifest.rb
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 require 'has_uuid'
6 require 'kind_and_etag'
7
8 class FixCollectionPortableDataHashWithHintedManifest < ActiveRecord::Migration[4.2]
9   include CurrentApiClient
10
11   class ArvadosModel < ActiveRecord::Base
12     self.abstract_class = true
13     extend HasUuid::ClassMethods
14     include CurrentApiClient
15     include KindAndEtag
16     before_create do |record|
17       record.uuid ||= record.class.generate_uuid
18       record.owner_uuid ||= system_user_uuid
19     end
20     serialize :properties, Hash
21
22     def self.to_s
23       # Clean up the name of the stub model class so we generate correct UUIDs.
24       super.sub("FixCollectionPortableDataHashWithHintedManifest::", "")
25     end
26   end
27
28   class Collection < ArvadosModel
29   end
30
31   class Log < ArvadosModel
32     def self.log_for(thing, age="old")
33       { "#{age}_etag" => thing.etag,
34         "#{age}_attributes" => thing.attributes,
35       }
36     end
37
38     def self.log_create(thing)
39       new_log("create", thing, log_for(thing, "new"))
40     end
41
42     def self.log_update(thing, start_state)
43       new_log("update", thing, start_state.merge(log_for(thing, "new")))
44     end
45
46     def self.log_destroy(thing)
47       new_log("destroy", thing, log_for(thing, "old"))
48     end
49
50     private
51
52     def self.new_log(event_type, thing, properties)
53       create!(event_type: event_type,
54               event_at: Time.now,
55               object_uuid: thing.uuid,
56               object_owner_uuid: thing.owner_uuid,
57               properties: properties)
58     end
59   end
60
61   def each_bad_collection
62     end_coll = Collection.order("id DESC").first
63     return if end_coll.nil?
64     seen_uuids = []
65     ("A".."Z").each do |hint_char|
66       query = Collection.
67         where("id <= ? AND manifest_text LIKE '%+#{hint_char}%'", end_coll.id)
68       unless seen_uuids.empty?
69         query = query.where("uuid NOT IN (?)", seen_uuids)
70       end
71       # It's important to make sure that this line doesn't swap.  The
72       # worst case scenario is that it finds a batch of collections that
73       # all have maximum size manifests (64MiB).  With a batch size of
74       # 50, that's about 3GiB.  Figure it will end up being 4GiB after
75       # other ActiveRecord overhead.  That's a size we're comfortable with.
76       query.find_each(batch_size: 50) do |coll|
77         seen_uuids << coll.uuid
78         stripped_manifest = coll.manifest_text.
79           gsub(/( [0-9a-f]{32}(\+\d+)?)\+\S+/, '\1')
80         stripped_pdh = sprintf("%s+%i",
81                                Digest::MD5.hexdigest(stripped_manifest),
82                                stripped_manifest.bytesize)
83         yield [coll, stripped_pdh] if (coll.portable_data_hash != stripped_pdh)
84       end
85     end
86   end
87
88   def up
89     Collection.reset_column_information
90     Log.reset_column_information
91     copied_attr_names =
92       [:owner_uuid, :created_at, :modified_by_client_uuid, :manifest_text,
93        :modified_by_user_uuid, :modified_at, :updated_at, :name,
94        :description, :portable_data_hash, :replication_desired,
95        :replication_confirmed, :replication_confirmed_at, :expires_at]
96     new_expiry = Date.new(2038, 1, 31)
97
98     each_bad_collection do |coll, stripped_pdh|
99       # Create a copy of the collection including bad portable data hash,
100       # with an expiration.  This makes it possible to resolve the bad
101       # portable data hash, but the expiration can hide the Collection
102       # from more user-friendly interfaces like Workbench.
103       start_log = Log.log_for(coll)
104       attributes = Hash[copied_attr_names.map { |key| [key, coll.send(key)] }]
105       attributes[:expires_at] ||= new_expiry
106       attributes[:properties] = (coll.properties.dup rescue {})
107       attributes[:properties]["migrated_from"] ||= coll.uuid
108       coll_copy = Collection.create!(attributes)
109       Log.log_create(coll_copy)
110       coll.update_attributes(portable_data_hash: stripped_pdh)
111       Log.log_update(coll, start_log)
112     end
113   end
114
115   def down
116     Collection.reset_column_information
117     Log.reset_column_information
118     each_bad_collection do |coll, stripped_pdh|
119       if ((src_uuid = coll.properties["migrated_from"]) and
120           (src_coll = Collection.where(uuid: src_uuid).first) and
121           (src_coll.portable_data_hash == stripped_pdh))
122         start_log = Log.log_for(src_coll)
123         src_coll.portable_data_hash = coll.portable_data_hash
124         src_coll.save!
125         Log.log_update(src_coll, start_log)
126         coll.destroy or raise Exception.new("failed to destroy old collection")
127         Log.log_destroy(coll)
128       end
129     end
130   end
131 end