2879: Improve sorting in Collection.uuids_for_docker_image.
[arvados.git] / services / api / app / models / collection.rb
index ef8a909a981b7a281957172ed67868602a2aee38..24e4ccc0ab744e32dcf1027c171251b7ae6455a7 100644 (file)
@@ -1,5 +1,5 @@
 class Collection < ArvadosModel
-  include AssignUuid
+  include HasUuid
   include KindAndEtag
   include CommonApiTemplate
 
@@ -8,6 +8,10 @@ class Collection < ArvadosModel
     t.add :files
   end
 
+  api_accessible :with_data, extend: :user do |t|
+    t.add :manifest_text
+  end
+
   def redundancy_status
     if redundancy_confirmed_as.nil?
       'unconfirmed'
@@ -61,8 +65,19 @@ class Collection < ArvadosModel
       @files = []
       return
     end
+
+    #normalized_manifest = ""
+    #IO.popen(['arv-normalize'], 'w+b') do |io|
+    #  io.write manifest_text
+    #  io.close_write
+    #  while buf = io.read(2**20)
+    #    normalized_manifest += buf
+    #  end
+    #end
+
     @data_size = 0
-    @files = []
+    tmp = {}
+
     manifest_text.split("\n").each do |stream|
       toks = stream.split(" ")
 
@@ -94,11 +109,27 @@ class Collection < ArvadosModel
               else $1.to_i(8).chr
               end
             end
-            @files << [stream, filename, re[2].to_i]
+            fn = stream + '/' + filename
+            i = re[2].to_i
+            if tmp[fn]
+              tmp[fn] += i
+            else
+              tmp[fn] = i
+            end
           end
         end
       end
     end
+
+    @files = []
+    tmp.each do |k, v|
+      re = k.match(/^(.+)\/(.+)/)
+      @files << [re[1], re[2], v]
+    end
+  end
+
+  def self.uuid_like_pattern
+    "________________________________+%"
   end
 
   def self.normalize_uuid uuid
@@ -116,4 +147,53 @@ class Collection < ArvadosModel
     raise "uuid #{uuid} has no hash part" if !hash_part
     [hash_part, size_part].compact.join '+'
   end
+
+  def self.uuids_for_docker_image(search_term, search_tag=nil, readers=nil)
+    readers ||= [Thread.current[:user]]
+    base_search = Link.
+      readable_by(*readers).
+      readable_by(*readers, table_name: "collections").
+      joins("JOIN collections ON links.head_uuid = collections.uuid").
+      order("links.created_at DESC")
+
+    # If the search term is a Collection locator with an associated
+    # Docker image hash link, return that Collection.
+    coll_matches = base_search.
+      where(link_class: "docker_image_hash", collections: {uuid: search_term})
+    if match = coll_matches.first
+      return [match.head_uuid]
+    end
+
+    # Find Collections with matching Docker image repository+tag pairs.
+    matches = base_search.
+      where(link_class: "docker_image_repo+tag",
+            name: "#{search_term}:#{search_tag || 'latest'}")
+
+    # If that didn't work, find Collections with matching Docker image hashes.
+    if matches.empty?
+      matches = base_search.
+        where("link_class = ? and name LIKE ?",
+              "docker_image_hash", "#{search_term}%")
+    end
+
+    # Generate an order key for each result.  We want to order the results
+    # so that anything with an image timestamp is considered more recent than
+    # anything without; then we use the link's created_at as a tiebreaker.
+    uuid_timestamps = {}
+    matches.find_each do |link|
+      uuid_timestamps[link.head_uuid] =
+        [(-link.properties["image_timestamp"].to_datetime.to_i rescue 0),
+         -link.created_at.to_i]
+    end
+    uuid_timestamps.keys.sort_by { |uuid| uuid_timestamps[uuid] }
+  end
+
+  def self.for_latest_docker_image(search_term, search_tag=nil, readers=nil)
+    image_uuid = uuids_for_docker_image(search_term, search_tag, readers).last
+    if image_uuid.nil?
+      nil
+    else
+      find_by_uuid(image_uuid)
+    end
+  end
 end