Put some code into collection model to do normalization but I don't think that is...
[arvados.git] / services / api / app / models / collection.rb
index 76dd60fb0a36c2a5dc9c14c55673eff57e55c61a..efe41cb4bbf3f03dbce4229bdbdeacb1cfd3a163 100644 (file)
@@ -1,9 +1,9 @@
-class Collection < OrvosModel
+class Collection < ArvadosModel
   include AssignUuid
   include KindAndEtag
   include CommonApiTemplate
 
-  api_accessible :superuser, :extend => :common do |t|
+  api_accessible :user, extend: :common do |t|
     t.add :data_size
     t.add :files
   end
@@ -28,7 +28,9 @@ class Collection < OrvosModel
     if self.manifest_text.nil? and self.uuid.nil?
       super
     elsif self.manifest_text and self.uuid
-      if self.uuid.gsub(/\+[^,]+/,'') == Digest::MD5.hexdigest(self.manifest_text)
+      self.uuid.gsub! /\+.*/, ''
+      if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
+        self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
         true
       else
         errors.add :uuid, 'uuid does not match checksum of manifest_text'
@@ -59,10 +61,28 @@ class Collection < OrvosModel
       @files = []
       return
     end
+
+    normalized_manifest = ""
+    IO.popen(['arv-normalize'], 'w+b') do |io|
+      io.write manifest_text
+      io.close_write
+      while buf = io.read(2**20)
+        normalized_manifest += buf
+      end
+    end
+
     @data_size = 0
     @files = []
-    manifest_text.split("\n").each do |stream|
+    normalized_manifest.split("\n").each do |stream|
       toks = stream.split(" ")
+
+      stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
+        case $1
+        when '\\' '\\'
+        else $1.to_i(8).chr
+        end
+      end
+
       toks[1..-1].each do |tok|
         if (re = tok.match /^[0-9a-f]{32}/)
           blocksize = nil
@@ -78,7 +98,17 @@ class Collection < OrvosModel
           @data_size += blocksize if @data_size
         else
           if (re = tok.match /^(\d+):(\d+):(\S+)$/)
-            @files << [toks[0], re[3], re[2].to_i]
+            filename = re[3].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
+              case $1
+              when '\\' '\\'
+              else $1.to_i(8).chr
+              end
+            end
+            if @files > 0 and @files[-1][0] == stream and @files[-1][1] == filename
+              @files[-1][2] += re[2].to_i
+            else
+              @files << [stream, filename, re[2].to_i]
+            end
           end
         end
       end