14484: Updates the migration to use the sdk for manifest parsing
authorEric Biagiotti <ebiagiotti@veritasgenetics.com>
Wed, 27 Mar 2019 17:27:06 +0000 (13:27 -0400)
committerEric Biagiotti <ebiagiotti@veritasgenetics.com>
Wed, 27 Mar 2019 17:27:06 +0000 (13:27 -0400)
Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

services/api/db/migrate/20190322174136_add_file_info_to_collection.rb

index e12e508be7ea10fd061f17007ec95a72aacdfe5b..a1a436ce54ebd2d223bb31a69e13a19d68514c50 100755 (executable)
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+require "arvados/keep"
+
 class AddFileInfoToCollection < ActiveRecord::Migration
   def do_batch(pdhs)
     pdhs_str = ''
@@ -15,23 +17,10 @@ class AddFileInfoToCollection < ActiveRecord::Migration
     )
 
     collections.rows.each do |row|
-      file_count = 0
-      file_size_total = 0
-      row[1].scan(/\S+/) do |token|
-        is_file = token.match(/^[[:digit:]]+:[[:digit:]]+:/)
-        if is_file
-          _, filesize, filename = token.split(':', 3)
-
-          # Avoid counting empty dir placeholders
-          break if filename == '.' && filesize.zero?
-
-          file_count += 1
-          file_size_total += filesize.to_i
-        end
-      end
+      manifest = Keep::Manifest.new(row[1])
       ActiveRecord::Base.connection.exec_query('BEGIN')
-      ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{file_count}, "\
-                                               "file_size_total=#{file_size_total} "\
+      ActiveRecord::Base.connection.exec_query("UPDATE collections SET file_count=#{manifest.files_count}, "\
+                                               "file_size_total=#{manifest.files_size} "\
                                                "WHERE portable_data_hash='#{row[0]}'")
       ActiveRecord::Base.connection.exec_query('COMMIT')
     end