6859: use --as-stream and improved arg parsing.
authorradhika <radhika@curoverse.com>
Thu, 13 Aug 2015 21:41:49 +0000 (17:41 -0400)
committerradhika <radhika@curoverse.com>
Thu, 13 Aug 2015 21:41:49 +0000 (17:41 -0400)
services/api/script/salvage_collection.rb

index bac033e214bd58a4ad7455b431aabbbfe0c6bef1..60becf456c247b47d11b1a17d9fb65da9108a031 100755 (executable)
@@ -17,31 +17,22 @@ opts = Trollop::options do
   banner "Usage: salvage_collection.rb " +
     "{uuid} {reason}"
   banner ''
-  opt :uuid, <<-eos
-uuid of the collection to be salvaged.
-  eos
-  opt :reason, <<-eos
-Reason for salvaging.
-  eos
+  opt :uuid, "uuid of the collection to be salvaged.", type: :string, required: true
+  opt :reason, "Reason for salvaging.", type: :string, required: false
 end
 
-if ARGV.count < 1
-  Trollop::die "required uuid argument is missing"
-end
-
-uuid, reason = ARGV
-
 require File.dirname(__FILE__) + '/../config/environment'
 require 'arvados/keep'
 include ApplicationHelper
 require 'tempfile'
+require 'shellwords'
 
 def salvage_collection uuid, reason
   act_as_system_user do
     src_collection = Collection.find_by_uuid uuid
     if !src_collection
       $stderr.puts "No collection found for #{uuid}. Returning."
-      return
+      exit 1
     end
 
     begin
@@ -53,7 +44,7 @@ def salvage_collection uuid, reason
         line.split(' ').each do |word|
           if match = Keep::Locator::LOCATOR_REGEXP.match(word)
             word = word.split('+')[0..1].join('+')  # get rid of any hints
-            locators << word
+            locators << word if !word.start_with?('00000000000000000000000000000000')
           end
         end
       end
@@ -63,20 +54,11 @@ def salvage_collection uuid, reason
       temp_file = Tempfile.new('temp')
       temp_file.write(src_manifest)
       temp_file.close
-
-      created = %x(arv-put --use-filename invalid_manifest_text.txt #{temp_file.path})
-
+      new_manifest = %x(arv-put --as-stream --use-filename invalid_manifest_text.txt #{Shellwords::shellescape(temp_file.path)})
       temp_file.unlink
 
-      created.rstrip!
-      match = created.match HasUuid::UUID_REGEX
-      raise "uuid not found" if !match
-
-      # update this new collection manifest to reference all locators from the original manifest
-      new_collection = Collection.find_by_uuid created
+      new_collection = Collection.new
 
-      new_manifest = new_collection['manifest_text']
-      new_manifest = new_manifest.gsub(/\+A[^+]*/, '')
       total_size = 0
       locators.each do |locator|
         total_size += locator.split('+')[1].to_i
@@ -87,27 +69,28 @@ def salvage_collection uuid, reason
       new_collection.manifest_text = new_manifest
       new_collection.portable_data_hash = Digest::MD5.hexdigest(new_manifest)
 
-      new_collection.save!
+      created = new_collection.save!
+      raise "New collection creation failed." if !created
 
       $stderr.puts "Salvaged manifest and data for #{uuid} are in #{new_collection.uuid}."
       puts "Created new collection #{created}"
     rescue => error
       $stderr.puts "Error creating collection for #{uuid}: #{error}"
-      return
+      exit 1
     end
 
     begin
       # update src_collection collection name, pdh, and manifest_text
       src_collection.name = (src_collection.name || '') + ' (' + (reason || '') + '; salvaged data at ' + created + ')'
-      src_collection.manifest_text = ''
       src_collection.portable_data_hash = 'd41d8cd98f00b204e9800998ecf8427e+0'
       src_collection.save!
       $stderr.puts "Collection #{uuid} emptied and renamed to #{src_collection.name.inspect}."
     rescue => error
       $stderr.puts "Error salvaging collection #{uuid}: #{error}"
+      exit 1
     end
   end
 end
 
 # Salvage the collection with the given uuid
-salvage_collection uuid, reason
+salvage_collection opts.uuid, opts.reason