Put some code into collection model to do normalization but I don't think that is...

[arvados.git] / services / api / app / models / collection.rb
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb

index 76dd60fb0a36c2a5dc9c14c55673eff57e55c61a..efe41cb4bbf3f03dbce4229bdbdeacb1cfd3a163 100644 (file)
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -1,9 +1,9 @@
-class Collection < OrvosModel
+class Collection < ArvadosModel
    include AssignUuid
    include KindAndEtag
    include CommonApiTemplate
  
-  api_accessible :superuser, :extend => :common do |t|
+  api_accessible :user, extend: :common do |t|
      t.add :data_size
      t.add :files
    end
@@ -28,7 +28,9 @@ class Collection < OrvosModel
      if self.manifest_text.nil? and self.uuid.nil?
        super
      elsif self.manifest_text and self.uuid
-      if self.uuid.gsub(/\+[^,]+/,'') == Digest::MD5.hexdigest(self.manifest_text)
+      self.uuid.gsub! /\+.*/, ''
+      if self.uuid == Digest::MD5.hexdigest(self.manifest_text)
+        self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s
          true
        else
          errors.add :uuid, 'uuid does not match checksum of manifest_text'
@@ -59,10 +61,28 @@ class Collection < OrvosModel
        @files = []
        return
      end
+
+    normalized_manifest = ""
+    IO.popen(['arv-normalize'], 'w+b') do |io|
+      io.write manifest_text
+      io.close_write
+      while buf = io.read(2**20)
+        normalized_manifest += buf
+      end
+    end
+
      @data_size = 0
      @files = []
-    manifest_text.split("\n").each do |stream|
+    normalized_manifest.split("\n").each do |stream|
        toks = stream.split(" ")
+
+      stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
+        case $1
+        when '\\' '\\'
+        else $1.to_i(8).chr
+        end
+      end
+
        toks[1..-1].each do |tok|
          if (re = tok.match /^[0-9a-f]{32}/)
            blocksize = nil
@@ -78,7 +98,17 @@ class Collection < OrvosModel
            @data_size += blocksize if @data_size
          else
            if (re = tok.match /^(\d+):(\d+):(\S+)$/)
-            @files << [toks[0], re[3], re[2].to_i]
+            filename = re[3].gsub /\\(\\|[0-7]{3})/ do |escape_sequence|
+              case $1
+              when '\\' '\\'
+              else $1.to_i(8).chr
+              end
+            end
+            if @files > 0 and @files[-1][0] == stream and @files[-1][1] == filename
+              @files[-1][2] += re[2].to_i
+            else
+              @files << [stream, filename, re[2].to_i]
+            end
            end
          end
        end