3036: API server tests pass
[arvados.git] / services / api / app / models / collection.rb
index 03e5e4ef44c32948e3f923ad3b849f0d71e28c8e..2bf781013bce167ae1a376740ad100cb1c73abf9 100644 (file)
@@ -1,11 +1,50 @@
 class Collection < ArvadosModel
-  include AssignUuid
+  include HasUuid
   include KindAndEtag
   include CommonApiTemplate
 
+  before_validation :set_portable_data_hash
+  validate :ensure_hash_matches_manifest_text
+
   api_accessible :user, extend: :common do |t|
     t.add :data_size
     t.add :files
+    t.add :name
+    t.add :description
+    t.add :properties
+    t.add :portable_data_hash
+  end
+
+  api_accessible :with_data, extend: :user do |t|
+    t.add :manifest_text
+  end
+
+  def set_portable_data_hash
+    if (self.portable_data_hash.nil? or (self.portable_data_hash == "") or (manifest_text_changed? and !portable_data_hash_changed?))
+      self.portable_data_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
+    elsif portable_data_hash_changed?
+      begin
+        loc = Locator.parse!(self.portable_data_hash)
+        loc.strip_hints!
+        self.portable_data_hash = loc.to_s
+      rescue ArgumentError => e
+        errors.add(:portable_data_hash, "#{e}")
+        return false
+      end
+    end
+    true
+  end
+
+  def ensure_hash_matches_manifest_text
+    if manifest_text_changed? or portable_data_hash_changed?
+      computed_hash = "#{Digest::MD5.hexdigest(manifest_text)}+#{manifest_text.length}"
+      unless computed_hash == portable_data_hash
+        logger.debug "(computed) '#{computed_hash}' != '#{portable_data_hash}' (provided)"
+        errors.add(:portable_data_hash, "does not match hash of manifest_text")
+        return false
+      end
+    end
+    true
   end
 
   def redundancy_status
@@ -24,27 +63,6 @@ class Collection < ArvadosModel
     end
   end
 
-  def assign_uuid
-    if self.manifest_text.nil? and self.uuid.nil?
-      super
-    elsif self.manifest_text and self.uuid
-      self.uuid.gsub! /\+.*/, ''
-      if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
-        self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
-        true
-      else
-        errors.add :uuid, 'uuid does not match checksum of manifest_text'
-        false
-      end
-    elsif self.manifest_text
-      errors.add :uuid, 'checksum for manifest_text not supplied in uuid'
-      false
-    else
-      errors.add :manifest_text, 'manifest_text not supplied'
-      false
-    end
-  end
-
   def data_size
     inspect_manifest_text if @data_size.nil? or manifest_text_changed?
     @data_size
@@ -61,8 +79,10 @@ class Collection < ArvadosModel
       @files = []
       return
     end
+
     @data_size = 0
-    @files = []
+    tmp = {}
+
     manifest_text.split("\n").each do |stream|
       toks = stream.split(" ")
 
@@ -94,10 +114,96 @@ class Collection < ArvadosModel
               else $1.to_i(8).chr
               end
             end
-            @files << [stream, filename, re[2].to_i]
+            fn = stream + '/' + filename
+            i = re[2].to_i
+            if tmp[fn]
+              tmp[fn] += i
+            else
+              tmp[fn] = i
+            end
           end
         end
       end
     end
+
+    @files = []
+    tmp.each do |k, v|
+      re = k.match(/^(.+)\/(.+)/)
+      @files << [re[1], re[2], v]
+    end
+  end
+
+  def self.normalize_uuid uuid
+    hash_part = nil
+    size_part = nil
+    uuid.split('+').each do |token|
+      if token.match /^[0-9a-f]{32,}$/
+        raise "uuid #{uuid} has multiple hash parts" if hash_part
+        hash_part = token
+      elsif token.match /^\d+$/
+        raise "uuid #{uuid} has multiple size parts" if size_part
+        size_part = token
+      end
+    end
+    raise "uuid #{uuid} has no hash part" if !hash_part
+    [hash_part, size_part].compact.join '+'
+  end
+
+  def self.uuids_for_docker_image(search_term, search_tag=nil, readers=nil)
+    readers ||= [Thread.current[:user]]
+    base_search = Link.
+      readable_by(*readers).
+      readable_by(*readers, table_name: "collections").
+      joins("JOIN collections ON links.head_uuid = collections.uuid").
+      order("links.created_at DESC")
+
+    # If the search term is a Collection locator that contains one file
+    # that looks like a Docker image, return it.
+    if loc = Locator.parse(search_term)
+      loc.strip_hints!
+      coll_match = readable_by(*readers).where(portable_data_hash: loc.to_s).limit(1).first
+      if coll_match and (coll_match.files.size == 1) and
+          (coll_match.files[0][1] =~ /^[0-9A-Fa-f]{64}\.tar$/)
+        return [coll_match.uuid]
+      end
+    end
+
+    if search_tag.nil? and (n = search_term.index(":"))
+      search_tag = search_term[n+1..-1]
+      search_term = search_term[0..n-1]
+    end
+
+    # Find Collections with matching Docker image repository+tag pairs.
+    matches = base_search.
+      where(link_class: "docker_image_repo+tag",
+            name: "#{search_term}:#{search_tag || 'latest'}")
+
+    # If that didn't work, find Collections with matching Docker image hashes.
+    if matches.empty?
+      matches = base_search.
+        where("link_class = ? and links.name LIKE ?",
+              "docker_image_hash", "#{search_term}%")
+    end
+
+    # Generate an order key for each result.  We want to order the results
+    # so that anything with an image timestamp is considered more recent than
+    # anything without; then we use the link's created_at as a tiebreaker.
+    uuid_timestamps = {}
+    matches.find_each do |link|
+      c = Collection.find_by_uuid(link.head_uuid)
+      uuid_timestamps[c.uuid] =
+        [(-link.properties["image_timestamp"].to_datetime.to_i rescue 0),
+         -link.created_at.to_i]
+    end
+    uuid_timestamps.keys.sort_by { |uuid| uuid_timestamps[uuid] }
+  end
+
+  def self.for_latest_docker_image(search_term, search_tag=nil, readers=nil)
+    image_uuid = uuids_for_docker_image(search_term, search_tag, readers).first
+    if image_uuid.nil?
+      nil
+    else
+      find_by_uuid(image_uuid)
+    end
   end
 end