8784: Fix test for latest firefox.
[arvados.git] / services / api / db / migrate / 20150303210106_fix_collection_portable_data_hash_with_hinted_manifest.rb
1 require 'has_uuid'
2 require 'kind_and_etag'
3
4 class FixCollectionPortableDataHashWithHintedManifest < ActiveRecord::Migration
5   include CurrentApiClient
6
7   class ArvadosModel < ActiveRecord::Base
8     self.abstract_class = true
9     extend HasUuid::ClassMethods
10     include CurrentApiClient
11     include KindAndEtag
12     before_create do |record|
13       record.uuid ||= record.class.generate_uuid
14       record.owner_uuid ||= system_user_uuid
15     end
16     serialize :properties, Hash
17
18     def self.to_s
19       # Clean up the name of the stub model class so we generate correct UUIDs.
20       super.sub("FixCollectionPortableDataHashWithHintedManifest::", "")
21     end
22   end
23
24   class Collection < ArvadosModel
25   end
26
27   class Log < ArvadosModel
28     def self.log_for(thing, age="old")
29       { "#{age}_etag" => thing.etag,
30         "#{age}_attributes" => thing.attributes,
31       }
32     end
33
34     def self.log_create(thing)
35       new_log("create", thing, log_for(thing, "new"))
36     end
37
38     def self.log_update(thing, start_state)
39       new_log("update", thing, start_state.merge(log_for(thing, "new")))
40     end
41
42     def self.log_destroy(thing)
43       new_log("destroy", thing, log_for(thing, "old"))
44     end
45
46     private
47
48     def self.new_log(event_type, thing, properties)
49       create!(event_type: event_type,
50               event_at: Time.now,
51               object_uuid: thing.uuid,
52               object_owner_uuid: thing.owner_uuid,
53               properties: properties)
54     end
55   end
56
57   def each_bad_collection
58     end_coll = Collection.order("id DESC").first
59     return if end_coll.nil?
60     seen_uuids = []
61     ("A".."Z").each do |hint_char|
62       query = Collection.
63         where("id <= ? AND manifest_text LIKE '%+#{hint_char}%'", end_coll.id)
64       unless seen_uuids.empty?
65         query = query.where("uuid NOT IN (?)", seen_uuids)
66       end
67       # It's important to make sure that this line doesn't swap.  The
68       # worst case scenario is that it finds a batch of collections that
69       # all have maximum size manifests (64MiB).  With a batch size of
70       # 50, that's about 3GiB.  Figure it will end up being 4GiB after
71       # other ActiveRecord overhead.  That's a size we're comfortable with.
72       query.find_each(batch_size: 50) do |coll|
73         seen_uuids << coll.uuid
74         stripped_manifest = coll.manifest_text.
75           gsub(/( [0-9a-f]{32}(\+\d+)?)\+\S+/, '\1')
76         stripped_pdh = sprintf("%s+%i",
77                                Digest::MD5.hexdigest(stripped_manifest),
78                                stripped_manifest.bytesize)
79         yield [coll, stripped_pdh] if (coll.portable_data_hash != stripped_pdh)
80       end
81     end
82   end
83
84   def up
85     Collection.reset_column_information
86     Log.reset_column_information
87     copied_attr_names =
88       [:owner_uuid, :created_at, :modified_by_client_uuid, :manifest_text,
89        :modified_by_user_uuid, :modified_at, :updated_at, :name,
90        :description, :portable_data_hash, :replication_desired,
91        :replication_confirmed, :replication_confirmed_at, :expires_at]
92     new_expiry = Date.new(2038, 1, 31)
93
94     each_bad_collection do |coll, stripped_pdh|
95       # Create a copy of the collection including bad portable data hash,
96       # with an expiration.  This makes it possible to resolve the bad
97       # portable data hash, but the expiration can hide the Collection
98       # from more user-friendly interfaces like Workbench.
99       start_log = Log.log_for(coll)
100       attributes = Hash[copied_attr_names.map { |key| [key, coll.send(key)] }]
101       attributes[:expires_at] ||= new_expiry
102       attributes[:properties] = (coll.properties.dup rescue {})
103       attributes[:properties]["migrated_from"] ||= coll.uuid
104       coll_copy = Collection.create!(attributes)
105       Log.log_create(coll_copy)
106       coll.update_attributes(portable_data_hash: stripped_pdh)
107       Log.log_update(coll, start_log)
108     end
109   end
110
111   def down
112     Collection.reset_column_information
113     Log.reset_column_information
114     each_bad_collection do |coll, stripped_pdh|
115       if ((src_uuid = coll.properties["migrated_from"]) and
116           (src_coll = Collection.where(uuid: src_uuid).first) and
117           (src_coll.portable_data_hash == stripped_pdh))
118         start_log = Log.log_for(src_coll)
119         src_coll.portable_data_hash = coll.portable_data_hash
120         src_coll.save!
121         Log.log_update(src_coll, start_log)
122         coll.destroy or raise Exception.new("failed to destroy old collection")
123         Log.log_destroy(coll)
124       end
125     end
126   end
127 end