3720: Refactor manifest parsing from API server to Ruby SDK.
[arvados.git] / services / api / app / controllers / arvados / v1 / collections_controller.rb
index a0c64aa6e6adc4ad30dc4094681a45d3ce597ecb..45331a36e6285de6aff3a857f7100a4cb5d8ca1d 100644 (file)
+require "arvados/keep"
+
 class Arvados::V1::CollectionsController < ApplicationController
   def create
-    # Collections are owned by system_user. Creating a collection has
-    # two effects: The collection is added if it doesn't already
-    # exist, and a "permission" Link is added (if one doesn't already
-    # exist) giving the current user (or specified owner_uuid)
-    # permission to read it.
-    owner_uuid = resource_attrs.delete(:owner_uuid) || current_user.uuid
-    unless current_user.can? write: owner_uuid
-      logger.warn "User #{current_user.andand.uuid} tried to set collection owner_uuid to #{owner_uuid}"
-      raise ArvadosModel::PermissionDeniedError
-    end
-
-    # Check permissions on the collection manifest.
-    # If any signature cannot be verified, return 403 Permission denied.
-    api_token = current_api_client_authorization.andand.api_token
-    signing_opts = {
-      key: Rails.configuration.blob_signing_key,
-      api_token: api_token,
-      ttl: Rails.configuration.blob_signing_ttl,
-    }
-    resource_attrs[:manifest_text].lines.each do |entry|
-      entry.split[1..-1].each do |tok|
-        if /^[[:digit:]]+:[[:digit:]]+:/.match tok
-          # This is a filename token, not a blob locator. Note that we
-          # keep checking tokens after this, even though manifest
-          # format dictates that all subsequent tokens will also be
-          # filenames. Safety first!
-        elsif Blob.verify_signature tok, signing_opts
-          # OK.
-        elsif Locator.parse(tok).andand.signature
-          # Signature provided, but verify_signature did not like it.
-          logger.warn "Invalid signature on locator #{tok}"
-          raise ArvadosModel::PermissionDeniedError
-        elsif Rails.configuration.permit_create_collection_with_unsigned_manifest
-          # No signature provided, but we are running in insecure mode.
-          logger.debug "Missing signature on locator #{tok} ignored"
-        elsif Blob.new(tok).empty?
-          # No signature provided -- but no data to protect, either.
-        else
-          logger.warn "Missing signature on locator #{tok}"
-          raise ArvadosModel::PermissionDeniedError
-        end
-      end
+    if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
+      resource_attrs[:portable_data_hash] = loc.to_s
+      resource_attrs.delete :uuid
     end
+    super
+  end
 
-    # Remove any permission signatures from the manifest.
-    resource_attrs[:manifest_text]
-      .gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) { |word|
-      word.strip!
-      loc = Locator.parse(word)
-      if loc
-        " " + loc.without_signature.to_s
-      else
-        " " + word
-      end
-    }
-
-    # Save the collection with the stripped manifest.
-    act_as_system_user do
-      @object = model_class.new resource_attrs.reject { |k,v| k == :owner_uuid }
-      begin
-        @object.save!
-      rescue ActiveRecord::RecordNotUnique
-        logger.debug resource_attrs.inspect
-        if @object.manifest_text and @object.uuid
-          @existing_object = model_class.
-            where('uuid=? and manifest_text=?',
-                  @object.uuid,
-                  @object.manifest_text).
-            first
-          @object = @existing_object || @object
-        end
-      end
-      if @object
-        link_attrs = {
-          owner_uuid: owner_uuid,
-          link_class: 'permission',
-          name: 'can_read',
-          head_uuid: @object.uuid,
-          tail_uuid: owner_uuid
+  def find_object_by_uuid
+    if loc = Keep::Locator.parse(params[:id])
+      loc.strip_hints!
+      if c = Collection.readable_by(*@read_users).where({ portable_data_hash: loc.to_s }).limit(1).first
+        @object = {
+          uuid: c.portable_data_hash,
+          portable_data_hash: c.portable_data_hash,
+          manifest_text: c.manifest_text,
         }
-        ActiveRecord::Base.transaction do
-          if Link.where(link_attrs).empty?
-            Link.create! link_attrs
-          end
-        end
       end
+    else
+      super
     end
-    show
+    true
   end
 
   def show
-    if current_api_client_authorization
-      signing_opts = {
-        key: Rails.configuration.blob_signing_key,
-        api_token: current_api_client_authorization.api_token,
-        ttl: Rails.configuration.blob_signing_ttl,
-      }
-      @object[:manifest_text]
-        .gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) { |word|
-        word.strip!
-        loc = Locator.parse(word)
-        if loc
-          " " + Blob.sign_locator(word, signing_opts)
-        else
-          " " + word
-        end
-      }
+    sign_manifests(@object[:manifest_text])
+    if @object.is_a? Collection
+      render json: @object.as_api_response
+    else
+      render json: @object
     end
-    render json: @object.as_api_response(:with_data)
   end
 
-  def collection_uuid(uuid)
-    m = /([a-f0-9]{32}(\+[0-9]+)?)(\+.*)?/.match(uuid)
-    if m
-      m[1]
-    else
-      nil
-    end
+  def index
+    sign_manifests(*@objects.map { |c| c[:manifest_text] })
+    super
   end
 
   def script_param_edges(visited, sp)
@@ -132,108 +51,70 @@ class Arvados::V1::CollectionsController < ApplicationController
       end
     when String
       return if sp.empty?
-      m = collection_uuid(sp)
-      if m
-        generate_provenance_edges(visited, m)
+      if loc = Keep::Locator.parse(sp)
+        search_edges(visited, loc.to_s, :search_up)
       end
     end
   end
 
-  def generate_provenance_edges(visited, uuid)
-    m = collection_uuid(uuid)
-    uuid = m if m
+  def search_edges(visited, uuid, direction)
+    if uuid.nil? or uuid.empty? or visited[uuid]
+      return
+    end
 
-    if not uuid or uuid.empty? or visited[uuid]
-      return ""
+    if loc = Keep::Locator.parse(uuid)
+      loc.strip_hints!
+      return if visited[loc.to_s]
     end
 
     logger.debug "visiting #{uuid}"
 
-    if m  
-      # uuid is a collection
-      Collection.readable_by(current_user).where(uuid: uuid).each do |c|
-        visited[uuid] = c.as_api_response
-        visited[uuid][:files] = []
-        c.files.each do |f|
-          visited[uuid][:files] << f
-        end
+    if loc
+      # uuid is a portable_data_hash
+      if c = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s).limit(1).first
+        visited[loc.to_s] = {
+          portable_data_hash: c.portable_data_hash,
+        }
       end
 
-      Job.readable_by(current_user).where(output: uuid).each do |job|
-        generate_provenance_edges(visited, job.uuid)
-      end
+      if direction == :search_up
+        # Search upstream for jobs where this locator is the output of some job
+        Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
+          search_edges(visited, job.uuid, :search_up)
+        end
 
-      Job.readable_by(current_user).where(log: uuid).each do |job|
-        generate_provenance_edges(visited, job.uuid)
-      end
-      
-    else
-      # uuid is something else
-      rsc = ArvadosModel::resource_class_for_uuid uuid
-      if rsc == Job
-        Job.readable_by(current_user).where(uuid: uuid).each do |job|
-          visited[uuid] = job.as_api_response
-          script_param_edges(visited, job.script_parameters)
+        Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
+          search_edges(visited, job.uuid, :search_up)
         end
-      elsif rsc != nil
-        rsc.where(uuid: uuid).each do |r|
-          visited[uuid] = r.as_api_response
+      elsif direction == :search_down
+        if loc.to_s == "d41d8cd98f00b204e9800998ecf8427e+0"
+          # Special case, don't follow the empty collection.
+          return
         end
-      end
-    end
-
-    Link.readable_by(current_user).
-      where(head_uuid: uuid, link_class: "provenance").
-      each do |link|
-      visited[link.uuid] = link.as_api_response
-      generate_provenance_edges(visited, link.tail_uuid)
-    end
-
-    #puts "finished #{uuid}"
-  end
-
-  def provenance
-    visited = {}
-    generate_provenance_edges(visited, @object[:uuid])
-    render json: visited
-  end
-
-  def generate_used_by_edges(visited, uuid)
-    m = collection_uuid(uuid)
-    uuid = m if m
-
-    if not uuid or uuid.empty? or visited[uuid]
-      return ""
-    end
-
-    logger.debug "visiting #{uuid}"
 
-    if m  
-      # uuid is a collection
-      Collection.readable_by(current_user).where(uuid: uuid).each do |c|
-        visited[uuid] = c.as_api_response
-        visited[uuid][:files] = []
-        c.files.each do |f|
-          visited[uuid][:files] << f
+        # Search downstream for jobs where this locator is in script_parameters
+        Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
+          search_edges(visited, job.uuid, :search_down)
         end
       end
-
-      if uuid == "d41d8cd98f00b204e9800998ecf8427e+0"
-        # special case for empty collection
-        return
-      end
-
-      Job.readable_by(current_user).where(["jobs.script_parameters like ?", "%#{uuid}%"]).each do |job|
-        generate_used_by_edges(visited, job.uuid)
-      end
-      
     else
-      # uuid is something else
+      # uuid is a regular Arvados UUID
       rsc = ArvadosModel::resource_class_for_uuid uuid
       if rsc == Job
-        Job.readable_by(current_user).where(uuid: uuid).each do |job|
+        Job.readable_by(*@read_users).where(uuid: uuid).each do |job|
           visited[uuid] = job.as_api_response
-          generate_used_by_edges(visited, job.output)
+          if direction == :search_up
+            # Follow upstream collections referenced in the script parameters
+            script_param_edges(visited, job.script_parameters)
+          elsif direction == :search_down
+            # Follow downstream job output
+            search_edges(visited, job.output, direction)
+          end
+        end
+      elsif rsc == Collection
+        if c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+          search_edges(visited, c.portable_data_hash, direction)
+          visited[c.portable_data_hash] = c.as_api_response
         end
       elsif rsc != nil
         rsc.where(uuid: uuid).each do |r|
@@ -242,38 +123,59 @@ class Arvados::V1::CollectionsController < ApplicationController
       end
     end
 
-    Link.readable_by(current_user).
-      where(tail_uuid: uuid, link_class: "provenance").
-      each do |link|
-      visited[link.uuid] = link.as_api_response
-      generate_used_by_edges(visited, link.head_uuid)
+    if direction == :search_up
+      # Search for provenance links pointing to the current uuid
+      Link.readable_by(*@read_users).
+        where(head_uuid: uuid, link_class: "provenance").
+        each do |link|
+        visited[link.uuid] = link.as_api_response
+        search_edges(visited, link.tail_uuid, direction)
+      end
+    elsif direction == :search_down
+      # Search for provenance links emanating from the current uuid
+      Link.readable_by(current_user).
+        where(tail_uuid: uuid, link_class: "provenance").
+        each do |link|
+        visited[link.uuid] = link.as_api_response
+        search_edges(visited, link.head_uuid, direction)
+      end
     end
+  end
 
-    #puts "finished #{uuid}"
+  def provenance
+    visited = {}
+    search_edges(visited, @object[:uuid] || @object[:portable_data_hash], :search_up)
+    render json: visited
   end
 
   def used_by
     visited = {}
-    generate_used_by_edges(visited, @object[:uuid])
+    search_edges(visited, @object[:uuid] || @object[:portable_data_hash], :search_down)
     render json: visited
   end
 
   protected
-  def find_object_by_uuid
+
+  def apply_filters
+    if action_name == 'index'
+      # Omit manifest_text from index results unless expressly selected.
+      @select ||= model_class.api_accessible_attributes(:user).
+        map { |attr_spec| attr_spec.first.to_s } - ["manifest_text"]
+    end
     super
-    if !@object and !params[:uuid].match(/^[0-9a-f]+\+\d+$/)
-      # Normalize the given uuid and search again.
-      hash_part = params[:uuid].match(/^([0-9a-f]*)/)[1]
-      collection = Collection.where('uuid like ?', hash_part + '+%').first
-      if collection
-        # We know the collection exists, and what its real uuid is in
-        # the database. Now, throw out @objects and repeat the usual
-        # lookup procedure. (Returning the collection at this point
-        # would bypass permission checks.)
-        @objects = nil
-        @where = { uuid: collection.uuid }
-        find_objects_for_index
-        @object = @objects.first
+  end
+
+  def sign_manifests(*manifests)
+    if current_api_client_authorization
+      signing_opts = {
+        key: Rails.configuration.blob_signing_key,
+        api_token: current_api_client_authorization.api_token,
+        ttl: Rails.configuration.blob_signing_ttl,
+      }
+      manifests.each do |text|
+        Collection.munge_manifest_locators(text) do |loc|
+          Blob.sign_locator(loc.to_s, signing_opts)
+        end
       end
     end
   end