Merge branch 'master' into origin-2883-job-log-viewer
[arvados.git] / services / api / app / models / collection.rb
index 99510449fe31193253c62ff057c883df4aa8e7ea..64a6bb05304b84877bb4f6d55231c3e561bc8ee6 100644 (file)
@@ -1,5 +1,5 @@
 class Collection < ArvadosModel
-  include AssignUuid
+  include HasUuid
   include KindAndEtag
   include CommonApiTemplate
 
@@ -8,6 +8,10 @@ class Collection < ArvadosModel
     t.add :files
   end
 
+  api_accessible :with_data, extend: :user do |t|
+    t.add :manifest_text
+  end
+
   def redundancy_status
     if redundancy_confirmed_as.nil?
       'unconfirmed'
@@ -28,17 +32,19 @@ class Collection < ArvadosModel
     if self.manifest_text.nil? and self.uuid.nil?
       super
     elsif self.manifest_text and self.uuid
-      if self.uuid.gsub(/\+[^,]+/,'') == Digest::MD5.hexdigest(self.manifest_text)
+      self.uuid.gsub! /\+.*/, ''
+      if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
+        self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
         true
       else
-        errors.add :uuid, 'uuid does not match checksum of manifest_text'
+        errors.add :uuid, 'does not match checksum of manifest_text'
         false
       end
     elsif self.manifest_text
-      errors.add :uuid, 'checksum for manifest_text not supplied in uuid'
+      errors.add :uuid, 'not supplied (must match checksum of manifest_text)'
       false
     else
-      errors.add :manifest_text, 'manifest_text not supplied'
+      errors.add :manifest_text, 'not supplied'
       false
     end
   end
@@ -59,10 +65,29 @@ class Collection < ArvadosModel
       @files = []
       return
     end
+
+    #normalized_manifest = ""
+    #IO.popen(['arv-normalize'], 'w+b') do |io|
+    #  io.write manifest_text
+    #  io.close_write
+    #  while buf = io.read(2**20)
+    #    normalized_manifest += buf
+    #  end
+    #end
+
     @data_size = 0
-    @files = []
+    tmp = {}
+
     manifest_text.split("\n").each do |stream|
       toks = stream.split(" ")
+
+      stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
+        case $1
+        when '\\' '\\'
+        else $1.to_i(8).chr
+        end
+      end
+
       toks[1..-1].each do |tok|
         if (re = tok.match /^[0-9a-f]{32}/)
           blocksize = nil
@@ -84,10 +109,87 @@ class Collection < ArvadosModel
               else $1.to_i(8).chr
               end
             end
-            @files << [toks[0], filename, re[2].to_i]
+            fn = stream + '/' + filename
+            i = re[2].to_i
+            if tmp[fn]
+              tmp[fn] += i
+            else
+              tmp[fn] = i
+            end
           end
         end
       end
     end
+
+    @files = []
+    tmp.each do |k, v|
+      re = k.match(/^(.+)\/(.+)/)
+      @files << [re[1], re[2], v]
+    end
+  end
+
+  def self.uuid_like_pattern
+    "________________________________+%"
+  end
+
+  def self.normalize_uuid uuid
+    hash_part = nil
+    size_part = nil
+    uuid.split('+').each do |token|
+      if token.match /^[0-9a-f]{32,}$/
+        raise "uuid #{uuid} has multiple hash parts" if hash_part
+        hash_part = token
+      elsif token.match /^\d+$/
+        raise "uuid #{uuid} has multiple size parts" if size_part
+        size_part = token
+      end
+    end
+    raise "uuid #{uuid} has no hash part" if !hash_part
+    [hash_part, size_part].compact.join '+'
+  end
+
+  def self.for_latest_docker_image(search_term, search_tag=nil, readers=nil)
+    readers ||= [Thread.current[:user]]
+    base_search = Link.
+      readable_by(*readers).
+      readable_by(*readers, table_name: "collections").
+      joins("JOIN collections ON links.head_uuid = collections.uuid").
+      order("links.created_at DESC")
+
+    # If the search term is a Collection locator with an associated
+    # Docker image hash link, return that Collection.
+    coll_matches = base_search.
+      where(link_class: "docker_image_hash", collections: {uuid: search_term})
+    if match = coll_matches.first
+      return find_by_uuid(match.head_uuid)
+    end
+
+    # Find Collections with matching Docker image repository+tag pairs.
+    matches = base_search.
+      where(link_class: "docker_image_repo+tag",
+            name: "#{search_term}:#{search_tag || 'latest'}")
+
+    # If that didn't work, find Collections with matching Docker image hashes.
+    if matches.empty?
+      matches = base_search.
+        where("link_class = ? and name LIKE ?",
+              "docker_image_hash", "#{search_term}%")
+    end
+
+    # Select the image that was created most recently.  Note that the
+    # SQL search order and fallback timestamp values are chosen so
+    # that if image timestamps are missing, we use the image with the
+    # newest link.
+    latest_image_link = nil
+    latest_image_timestamp = "1900-01-01T00:00:00Z"
+    matches.find_each do |link|
+      link_timestamp = link.properties.fetch("image_timestamp",
+                                             "1900-01-01T00:00:01Z")
+      if link_timestamp > latest_image_timestamp
+        latest_image_link = link
+        latest_image_timestamp = link_timestamp
+      end
+    end
+    latest_image_link.nil? ? nil : find_by_uuid(latest_image_link.head_uuid)
   end
 end