1 class Collection < ArvadosModel
4 include CommonApiTemplate
6 before_validation :set_portable_data_hash
7 validate :ensure_hash_matches_manifest_text
9 api_accessible :user, extend: :common do |t|
15 t.add :portable_data_hash
19 def self.attributes_required_columns
20 super.merge({ "data_size" => ["manifest_text"],
21 "files" => ["manifest_text"],
25 def set_portable_data_hash
26 if (self.portable_data_hash.nil? or (self.portable_data_hash == "") or (manifest_text_changed? and !portable_data_hash_changed?))
27 self.portable_data_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
28 elsif portable_data_hash_changed?
30 loc = Locator.parse!(self.portable_data_hash)
32 self.portable_data_hash = loc.to_s
33 rescue ArgumentError => e
34 errors.add(:portable_data_hash, "#{e}")
41 def ensure_hash_matches_manifest_text
42 if manifest_text_changed? or portable_data_hash_changed?
43 computed_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
44 unless computed_hash == portable_data_hash
45 logger.debug "(computed) '#{computed_hash}' != '#{portable_data_hash}' (provided)"
46 errors.add(:portable_data_hash, "does not match hash of manifest_text")
54 if redundancy_confirmed_as.nil?
56 elsif redundancy_confirmed_as < redundancy
59 if redundancy_confirmed_at.nil?
61 elsif Time.now - redundancy_confirmed_at < 7.days
70 inspect_manifest_text if @data_size.nil? or manifest_text_changed?
75 inspect_manifest_text if @files.nil? or manifest_text_changed?
79 def inspect_manifest_text
89 manifest_text.split("\n").each do |stream|
90 toks = stream.split(" ")
92 stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
99 toks[1..-1].each do |tok|
100 if (re = tok.match /^[0-9a-f]{32}/)
102 tok.split('+')[1..-1].each do |hint|
103 if !blocksize and hint.match /^\d+$/
104 blocksize = hint.to_i
106 if (re = hint.match /^GS(\d+)$/)
107 blocksize = re[1].to_i
110 @data_size = false if !blocksize
111 @data_size += blocksize if @data_size
113 if (re = tok.match /^(\d+):(\d+):(\S+)$/)
114 filename = re[3].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
120 fn = stream + '/' + filename
134 re = k.match(/^(.+)\/(.+)/)
135 @files << [re[1], re[2], v]
139 def self.normalize_uuid uuid
142 uuid.split('+').each do |token|
143 if token.match /^[0-9a-f]{32,}$/
144 raise "uuid #{uuid} has multiple hash parts" if hash_part
146 elsif token.match /^\d+$/
147 raise "uuid #{uuid} has multiple size parts" if size_part
151 raise "uuid #{uuid} has no hash part" if !hash_part
152 [hash_part, size_part].compact.join '+'
155 def self.uuids_for_docker_image(search_term, search_tag=nil, readers=nil)
156 readers ||= [Thread.current[:user]]
158 readable_by(*readers).
159 readable_by(*readers, table_name: "collections").
160 joins("JOIN collections ON links.head_uuid = collections.uuid").
161 order("links.created_at DESC")
163 # If the search term is a Collection locator that contains one file
164 # that looks like a Docker image, return it.
165 if loc = Locator.parse(search_term)
167 coll_match = readable_by(*readers).where(portable_data_hash: loc.to_s).limit(1).first
168 if coll_match and (coll_match.files.size == 1) and
169 (coll_match.files[0][1] =~ /^[0-9A-Fa-f]{64}\.tar$/)
170 return [coll_match.uuid]
174 if search_tag.nil? and (n = search_term.index(":"))
175 search_tag = search_term[n+1..-1]
176 search_term = search_term[0..n-1]
179 # Find Collections with matching Docker image repository+tag pairs.
180 matches = base_search.
181 where(link_class: "docker_image_repo+tag",
182 name: "#{search_term}:#{search_tag || 'latest'}")
184 # If that didn't work, find Collections with matching Docker image hashes.
186 matches = base_search.
187 where("link_class = ? and links.name LIKE ?",
188 "docker_image_hash", "#{search_term}%")
191 # Generate an order key for each result. We want to order the results
192 # so that anything with an image timestamp is considered more recent than
193 # anything without; then we use the link's created_at as a tiebreaker.
195 matches.find_each do |link|
196 c = Collection.find_by_uuid(link.head_uuid)
197 uuid_timestamps[c.uuid] =
198 [(-link.properties["image_timestamp"].to_datetime.to_i rescue 0),
199 -link.created_at.to_i]
201 uuid_timestamps.keys.sort_by { |uuid| uuid_timestamps[uuid] }
204 def self.for_latest_docker_image(search_term, search_tag=nil, readers=nil)
205 image_uuid = uuids_for_docker_image(search_term, search_tag, readers).first
209 find_by_uuid(image_uuid)