X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/d6cc9d6e055974384be446f6da8218ff7da048ae..ffa11e9dc42740652da1d8382b89bc7eaaac5e2e:/services/api/app/models/collection.rb diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb index df84b09dfb..efe41cb4bb 100644 --- a/services/api/app/models/collection.rb +++ b/services/api/app/models/collection.rb @@ -28,7 +28,9 @@ class Collection < ArvadosModel if self.manifest_text.nil? and self.uuid.nil? super elsif self.manifest_text and self.uuid - if self.uuid.gsub(/\+[^,]+/,'') == Digest::MD5.hexdigest(self.manifest_text) + self.uuid.gsub! /\+.*/, '' + if self.uuid == Digest::MD5.hexdigest(self.manifest_text) + self.uuid.gsub! /$/, '+' + self.manifest_text.length.to_s true else errors.add :uuid, 'uuid does not match checksum of manifest_text' @@ -59,10 +61,28 @@ class Collection < ArvadosModel @files = [] return end + + normalized_manifest = "" + IO.popen(['arv-normalize'], 'w+b') do |io| + io.write manifest_text + io.close_write + while buf = io.read(2**20) + normalized_manifest += buf + end + end + @data_size = 0 @files = [] - manifest_text.split("\n").each do |stream| + normalized_manifest.split("\n").each do |stream| toks = stream.split(" ") + + stream = toks[0].gsub /\\(\\|[0-7]{3})/ do |escape_sequence| + case $1 + when '\\' '\\' + else $1.to_i(8).chr + end + end + toks[1..-1].each do |tok| if (re = tok.match /^[0-9a-f]{32}/) blocksize = nil @@ -78,7 +98,17 @@ class Collection < ArvadosModel @data_size += blocksize if @data_size else if (re = tok.match /^(\d+):(\d+):(\S+)$/) - @files << [toks[0], re[3], re[2].to_i] + filename = re[3].gsub /\\(\\|[0-7]{3})/ do |escape_sequence| + case $1 + when '\\' '\\' + else $1.to_i(8).chr + end + end + if @files > 0 and @files[-1][0] == stream and @files[-1][1] == filename + @files[-1][2] += re[2].to_i + else + @files << [stream, filename, re[2].to_i] + end end end end