Put some code into collection model to do normalization but I don't think that is...
[arvados.git] / services / api / app / models / collection.rb
index 99510449fe31193253c62ff057c883df4aa8e7ea..efe41cb4bbf3f03dbce4229bdbdeacb1cfd3a163 100644 (file)
@@ -28,7 +28,9 @@ class Collection < ArvadosModel
     if self.manifest_text.nil? and self.uuid.nil?
       super
     elsif self.manifest_text and self.uuid
-      if self.uuid.gsub(/\+[^,]+/,'') == Digest::MD5.hexdigest(self.manifest_text)
+      self.uuid.gsub! /\+.*/, ''
+      if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
+        self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
         true
       else
         errors.add :uuid, 'uuid does not match checksum of manifest_text'
@@ -59,10 +61,28 @@ class Collection < ArvadosModel
       @files = []
       return
     end
+
+    normalized_manifest = ""
+    IO.popen(['arv-normalize'], 'w+b') do |io|
+      io.write manifest_text
+      io.close_write
+      while buf = io.read(2**20)
+        normalized_manifest += buf
+      end
+    end
+
     @data_size = 0
     @files = []
-    manifest_text.split("\n").each do |stream|
+    normalized_manifest.split("\n").each do |stream|
       toks = stream.split(" ")
+
+      stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
+        case $1
+        when '\\' '\\'
+        else $1.to_i(8).chr
+        end
+      end
+
       toks[1..-1].each do |tok|
         if (re = tok.match /^[0-9a-f]{32}/)
           blocksize = nil
@@ -84,7 +104,11 @@ class Collection < ArvadosModel
               else $1.to_i(8).chr
               end
             end
-            @files << [toks[0], filename, re[2].to_i]
+            if @files > 0 and @files[-1][0] == stream and @files[-1][1] == filename
+              @files[-1][2] += re[2].to_i
+            else
+              @files << [stream, filename, re[2].to_i]
+            end
           end
         end
       end