Finished code to assemble a new manifest from the supplied files and collections.
authorPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 28 Feb 2014 21:36:44 +0000 (16:36 -0500)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Fri, 28 Feb 2014 21:36:44 +0000 (16:36 -0500)
New collection is added to API server and to Keep.
Added --extract command to arv-normalize to produce manifest for a single file in a collection.

apps/workbench/app/controllers/actions_controller.rb
apps/workbench/app/models/collection.rb
apps/workbench/app/views/collections/_show_files.html.erb
sdk/python/bin/arv-normalize

index 94bfd1e8e8101b44092df7698e883074edfe991a..6f5ed608e91bd4f2c523641d9f913d7b35c46f54 100644 (file)
@@ -1,20 +1,77 @@
 class ActionsController < ApplicationController
   def combine_selected_files_into_collection
     lst = []
+    files = []
     params["selection"].each do |s|
       m = CollectionsHelper.match(s)
-      if m
+      if m[1] and m[2]
         lst.append(m[1] + m[2])
+        files.append(m)
       end
     end
 
     collections = Collection.where(uuid: lst)
-    
-    collections.each do |c| 
-      puts c.manifest_text
+
+    chash = {}
+    collections.each do |c|
+      c.reload()
+      chash[c.uuid] = c
+    end
+
+    combined = ""
+    files.each do |m|
+      mt = chash[m[1]+m[2]].manifest_text
+      if m[4]
+        IO.popen(['arv-normalize', '--extract', m[4][1..-1]], 'w+b') do |io|
+          io.write mt
+          io.close_write
+          while buf = io.read(2**20)
+            combined += buf
+          end
+        end
+      else
+        combined += chash[m[1]+m[2]].manifest_text
+      end
     end
 
-    '/'
+    normalized = ''
+    IO.popen(['arv-normalize'], 'w+b') do |io|
+      io.write combined
+      io.close_write
+      while buf = io.read(2**20)
+        normalized += buf
+      end
+    end
+
+    require 'digest/md5'
+
+    d = Digest::MD5.new()
+    d << normalized
+    newuuid = "#{d.hexdigest}+#{normalized.length}"
+
+    env = Hash[ENV].
+      merge({
+              'ARVADOS_API_HOST' =>
+              $arvados_api_client.arvados_v1_base.
+              sub(/\/arvados\/v1/, '').
+              sub(/^https?:\/\//, ''),
+              'ARVADOS_API_TOKEN' => Thread.current[:arvados_api_token],
+              'ARVADOS_API_HOST_INSECURE' =>
+              Rails.configuration.arvados_insecure_https ? 'true' : 'false'
+            })
+
+    IO.popen([env, 'arv-put', '--raw'], 'w+b') do |io|
+      io.write normalized
+      io.close_write
+      while buf = io.read(2**20)
+
+      end
+    end
+
+    newc = Collection.new({:uuid => newuuid, :manifest_text => normalized})
+    newc.save!
+
+    '/collections/' + newc.uuid
   end
 
   def post
index e22a5f72f4a3a062c0fc9b5cc7fa8bb287ba7fe8..fd6b4e4efc4cf23b6d6d2f0fe18f6d0c9a027dd7 100644 (file)
@@ -1,4 +1,5 @@
 class Collection < ArvadosBase
+
   def total_bytes
     if files
       tot = 0
index a8535ab2595b75b7b33c7c3a5b3868b491cbdf18..6600a40844cdd26bb81135810d885b05ed2d3de0 100644 (file)
@@ -19,8 +19,7 @@
     <% file_path = "#{file[0]}/#{file[1]}" %>
     <tr>
       <td>
-        <% puts "#{file[0]} #{file[1]} #{file_path}" %>
-        <%= check_box_tag 'uuids[]', @object.uuid+file_path, false, {
+        <%= check_box_tag 'uuids[]', @object.uuid+'/'+file_path, false, {
               :class => 'persistent-selection', 
               :friendly_type => "File",
               :friendly_name => "#{@object.uuid}/#{file_path}",
index b1a6ca7b42970f100b4badba45f6755aeb4094d8..755b56507289bbf1d5601ed3e9f238523a0dae1e 100755 (executable)
@@ -13,6 +13,8 @@ logger = logging.getLogger(os.path.basename(sys.argv[0]))
 parser = argparse.ArgumentParser(
     description='Read manifest on standard input and put normalized manifest on standard output.')
 
+parser.add_argument('--extract', type=str, help="The file to extract from the input manifest")
+
 args = parser.parse_args()
 
 import arvados
@@ -21,4 +23,17 @@ r = sys.stdin.read()
     
 cr = arvados.CollectionReader(r)
 
-print cr.manifest_text()
+if args.extract:
+    i = args.extract.rfind('/')
+    if i == -1:
+        stream = '.'
+        fn = args.extract
+    else:
+        stream = args.extract[:i]
+        fn = args.extract[(i+1):]
+    for s in cr.all_streams():
+        if s.name() == stream:
+            if fn in s.files():
+                sys.stdout.write(s.files()[fn].as_manifest())
+else:
+    sys.stdout.write(cr.manifest_text())