Merge branch '19830-pysdk-util-docs'
[arvados.git] / services / api / app / controllers / arvados / v1 / collections_controller.rb
index 6b2bd64be93d1211100fd44d5767b16ac5631ea5..ad1771a87eac936c697ae6e0ef5ba1985d599fcc 100644 (file)
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require "arvados/keep"
+require "trashable"
+
 class Arvados::V1::CollectionsController < ApplicationController
-  def create
-    if !resource_attrs[:manifest_text]
-      return send_error("'manifest_text' attribute must be specified",
-                        status: :unprocessable_entity)
-    end
+  include DbCurrentTime
+  include TrashableController
 
-    # Check permissions on the collection manifest.
-    # If any signature cannot be verified, return 403 Permission denied.
-    api_token = current_api_client_authorization.andand.api_token
-    signing_opts = {
-      key: Rails.configuration.blob_signing_key,
-      api_token: api_token,
-      ttl: Rails.configuration.blob_signing_ttl,
-    }
-    resource_attrs[:manifest_text].lines.each do |entry|
-      entry.split[1..-1].each do |tok|
-        if /^[[:digit:]]+:[[:digit:]]+:/.match tok
-          # This is a filename token, not a blob locator. Note that we
-          # keep checking tokens after this, even though manifest
-          # format dictates that all subsequent tokens will also be
-          # filenames. Safety first!
-        elsif Blob.verify_signature tok, signing_opts
-          # OK.
-        elsif Locator.parse(tok).andand.signature
-          # Signature provided, but verify_signature did not like it.
-          logger.warn "Invalid signature on locator #{tok}"
-          raise ArvadosModel::PermissionDeniedError
-        elsif Rails.configuration.permit_create_collection_with_unsigned_manifest
-          # No signature provided, but we are running in insecure mode.
-          logger.debug "Missing signature on locator #{tok} ignored"
-        elsif Blob.new(tok).empty?
-          # No signature provided -- but no data to protect, either.
-        else
-          logger.warn "Missing signature on locator #{tok}"
-          raise ArvadosModel::PermissionDeniedError
-        end
-      end
+  def self._index_requires_parameters
+    (super rescue {}).
+      merge({
+        include_trash: {
+          type: 'boolean', required: false, default: false, description: "Include collections whose is_trashed attribute is true.",
+        },
+        include_old_versions: {
+          type: 'boolean', required: false, default: false, description: "Include past collection versions.",
+        },
+      })
+  end
+
+  def self._show_requires_parameters
+    (super rescue {}).
+      merge({
+        include_trash: {
+          type: 'boolean', required: false, default: false, description: "Show collection even if its is_trashed attribute is true.",
+        },
+        include_old_versions: {
+          type: 'boolean', required: false, default: true, description: "Include past collection versions.",
+        },
+      })
+  end
+
+  def create
+    if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
+      resource_attrs[:portable_data_hash] = loc.to_s
+      resource_attrs.delete :uuid
     end
+    resource_attrs.delete :version
+    resource_attrs.delete :current_version_uuid
+    super
+  end
 
-    # Remove any permission signatures from the manifest.
-    munge_manifest_locators(resource_attrs[:manifest_text]) do |loc|
-      loc.without_signature.to_s
+  def update
+    # preserve_version should be disabled unless explicitly asked otherwise.
+    if !resource_attrs[:preserve_version]
+      resource_attrs[:preserve_version] = false
     end
+    super
+  end
 
+  def find_objects_for_index
+    opts = {
+      include_trash: params[:include_trash] || ['destroy', 'trash', 'untrash'].include?(action_name),
+      include_old_versions: params[:include_old_versions] || false,
+    }
+    @objects = Collection.readable_by(*@read_users, opts) if !opts.empty?
     super
   end
 
-  def find_object_by_uuid
-    if loc = Locator.parse(params[:id])
+  def find_object_by_uuid(with_lock: false)
+    if loc = Keep::Locator.parse(params[:id])
       loc.strip_hints!
-      if c = Collection.readable_by(*@read_users).where({ portable_data_hash: loc.to_s }).limit(1).first
+
+      opts = {
+        include_trash: params[:include_trash],
+        include_old_versions: params[:include_old_versions],
+      }
+
+      # It matters which Collection object we pick because blob
+      # signatures depend on the value of trash_at.
+      #
+      # From postgres doc: "By default, null values sort as if larger
+      # than any non-null value; that is, NULLS FIRST is the default
+      # for DESC order, and NULLS LAST otherwise."
+      #
+      # "trash_at desc" sorts null first, then latest to earliest, so
+      # it will select the Collection object with the longest
+      # available lifetime.
+
+      select_attrs = (@select || ["manifest_text"]) | ["portable_data_hash", "trash_at"]
+      model = Collection
+      if with_lock && Rails.configuration.API.LockBeforeUpdate
+        model = model.lock
+      end
+      if c = model.
+               readable_by(*@read_users, opts).
+               where({ portable_data_hash: loc.to_s }).
+               order("trash_at desc").
+               select(select_attrs.join(", ")).
+               limit(1).
+               first
         @object = {
+          uuid: c.portable_data_hash,
           portable_data_hash: c.portable_data_hash,
-          manifest_text: c.manifest_text,
-          files: c.files,
-          data_size: c.data_size
+          trash_at: c.trash_at,
         }
+        if select_attrs.index("manifest_text")
+          @object[:manifest_text] = c.manifest_text
+        end
       end
     else
-      super
+      super(with_lock: with_lock)
     end
-    true
   end
 
   def show
-    sign_manifests(@object[:manifest_text])
-    super
+    if @object.is_a? Collection
+      # Omit unsigned_manifest_text
+      @select ||= model_class.selectable_attributes - ["unsigned_manifest_text"]
+      super
+    else
+      send_json @object
+    end
   end
 
-  def index
-    sign_manifests(*@objects.map { |c| c[:manifest_text] })
-    super
-  end
 
-  def script_param_edges(visited, sp)
+  def find_collections(visited, sp, ignore_columns=[], &b)
     case sp
+    when ArvadosModel
+      sp.class.columns.each do |c|
+        find_collections(visited, sp[c.name.to_sym], &b) if !ignore_columns.include?(c.name)
+      end
     when Hash
       sp.each do |k, v|
-        script_param_edges(visited, v)
+        find_collections(visited, v, &b)
       end
     when Array
       sp.each do |v|
-        script_param_edges(visited, v)
+        find_collections(visited, v, &b)
       end
     when String
-      return if sp.empty?
-      if loc = Locator.parse(sp)
-        search_edges(visited, loc.to_s, :search_up)
+      if m = /[a-f0-9]{32}\+\d+/.match(sp)
+        yield m[0], nil
+      elsif m = Collection.uuid_regex.match(sp)
+        yield nil, m[0]
       end
     end
   end
@@ -96,31 +145,50 @@ class Arvados::V1::CollectionsController < ApplicationController
       return
     end
 
-    if loc = Locator.parse(uuid)
+    if loc = Keep::Locator.parse(uuid)
       loc.strip_hints!
       return if visited[loc.to_s]
     end
 
-    logger.debug "visiting #{uuid}"
-
     if loc
       # uuid is a portable_data_hash
-      if c = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s).limit(1).first
-        visited[loc.to_s] = {
-          portable_data_hash: c.portable_data_hash,
-          files: c.files,
-          data_size: c.data_size
-        }
+      collections = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s)
+      c = collections.limit(2).all
+      if c.size == 1
+        visited[loc.to_s] = c[0]
+      elsif c.size > 1
+        name = collections.limit(1).where("name <> ''").first
+        if name
+          visited[loc.to_s] = {
+            portable_data_hash: c[0].portable_data_hash,
+            name: "#{name.name} + #{collections.count-1} more"
+          }
+        else
+          visited[loc.to_s] = {
+            portable_data_hash: c[0].portable_data_hash,
+            name: loc.to_s
+          }
+        end
       end
 
       if direction == :search_up
         # Search upstream for jobs where this locator is the output of some job
-        Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
-          search_edges(visited, job.uuid, :search_up)
+        if !Rails.configuration.API.DisabledAPIs["jobs.list"]
+          Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
+            search_edges(visited, job.uuid, :search_up)
+          end
+
+          Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
+            search_edges(visited, job.uuid, :search_up)
+          end
+        end
+
+        Container.readable_by(*@read_users).where(output: loc.to_s).pluck(:uuid).each do |c_uuid|
+          search_edges(visited, c_uuid, :search_up)
         end
 
-        Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
-          search_edges(visited, job.uuid, :search_up)
+        Container.readable_by(*@read_users).where(log: loc.to_s).pluck(:uuid).each do |c_uuid|
+          search_edges(visited, c_uuid, :search_up)
         end
       elsif direction == :search_down
         if loc.to_s == "d41d8cd98f00b204e9800998ecf8427e+0"
@@ -129,8 +197,20 @@ class Arvados::V1::CollectionsController < ApplicationController
         end
 
         # Search downstream for jobs where this locator is in script_parameters
-        Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
-          search_edges(visited, job.uuid, :search_down)
+        if !Rails.configuration.API.DisabledAPIs["jobs.list"]
+          Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
+            search_edges(visited, job.uuid, :search_down)
+          end
+
+          Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
+            search_edges(visited, job.uuid, :search_down)
+          end
+        end
+
+        Container.readable_by(*@read_users).where([Container.full_text_trgm + " like ?", "%#{loc.to_s}%"]).select("output, log, uuid").each do |c|
+          if c.output != loc.to_s && c.log != loc.to_s
+            search_edges(visited, c.uuid, :search_down)
+          end
         end
       end
     else
@@ -141,16 +221,71 @@ class Arvados::V1::CollectionsController < ApplicationController
           visited[uuid] = job.as_api_response
           if direction == :search_up
             # Follow upstream collections referenced in the script parameters
-            script_param_edges(visited, job.script_parameters)
+            find_collections(visited, job) do |hash, col_uuid|
+              search_edges(visited, hash, :search_up) if hash
+              search_edges(visited, col_uuid, :search_up) if col_uuid
+            end
           elsif direction == :search_down
             # Follow downstream job output
             search_edges(visited, job.output, direction)
           end
         end
+      elsif rsc == Container
+        c = Container.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+        if c
+          visited[uuid] = c.as_api_response
+          if direction == :search_up
+            # Follow upstream collections referenced in the script parameters
+            find_collections(visited, c, ignore_columns=["log", "output"]) do |hash, col_uuid|
+              search_edges(visited, hash, :search_up) if hash
+              search_edges(visited, col_uuid, :search_up) if col_uuid
+            end
+          elsif direction == :search_down
+            # Follow downstream job output
+            search_edges(visited, c.output, :search_down)
+          end
+        end
+      elsif rsc == ContainerRequest
+        c = ContainerRequest.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+        if c
+          visited[uuid] = c.as_api_response
+          if direction == :search_up
+            # Follow upstream collections
+            find_collections(visited, c, ignore_columns=["log_uuid", "output_uuid"]) do |hash, col_uuid|
+              search_edges(visited, hash, :search_up) if hash
+              search_edges(visited, col_uuid, :search_up) if col_uuid
+            end
+          elsif direction == :search_down
+            # Follow downstream job output
+            search_edges(visited, c.output_uuid, :search_down)
+          end
+        end
       elsif rsc == Collection
-        if c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
-          search_edges(visited, c.portable_data_hash, direction)
-          visited[c.portable_data_hash] = c.as_api_response
+        c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+        if c
+          if direction == :search_up
+            visited[c.uuid] = c.as_api_response
+
+            if !Rails.configuration.API.DisabledAPIs["jobs.list"]
+              Job.readable_by(*@read_users).where(output: c.portable_data_hash).each do |job|
+                search_edges(visited, job.uuid, :search_up)
+              end
+
+              Job.readable_by(*@read_users).where(log: c.portable_data_hash).each do |job|
+                search_edges(visited, job.uuid, :search_up)
+              end
+            end
+
+            ContainerRequest.readable_by(*@read_users).where(output_uuid: uuid).pluck(:uuid).each do |cr_uuid|
+              search_edges(visited, cr_uuid, :search_up)
+            end
+
+            ContainerRequest.readable_by(*@read_users).where(log_uuid: uuid).pluck(:uuid).each do |cr_uuid|
+              search_edges(visited, cr_uuid, :search_up)
+            end
+          elsif direction == :search_down
+            search_edges(visited, c.portable_data_hash, :search_down)
+          end
         end
       elsif rsc != nil
         rsc.where(uuid: uuid).each do |r|
@@ -180,72 +315,31 @@ class Arvados::V1::CollectionsController < ApplicationController
 
   def provenance
     visited = {}
-    search_edges(visited, @object[:uuid] || @object[:portable_data_hash], :search_up)
-    render json: visited
+    if @object[:uuid]
+      search_edges(visited, @object[:uuid], :search_up)
+    else
+      search_edges(visited, @object[:portable_data_hash], :search_up)
+    end
+    send_json visited
   end
 
   def used_by
     visited = {}
-    search_edges(visited, @object[:uuid] || @object[:portable_data_hash], :search_down)
-    render json: visited
-  end
-
-  def self.munge_manifest_locators(manifest)
-    # Given a manifest text and a block, yield each locator,
-    # and replace it with whatever the block returns.
-    manifest.andand.gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) do |word|
-      if loc = Locator.parse(word.strip)
-        " " + yield(loc)
-      else
-        " " + word
-      end
+    if @object[:uuid]
+      search_edges(visited, @object[:uuid], :search_down)
+    else
+      search_edges(visited, @object[:portable_data_hash], :search_down)
     end
+    send_json visited
   end
 
   protected
 
-  def find_objects_for_index
-    # Omit manifest_text from index results unless expressly selected.
-    @select ||= model_class.api_accessible_attributes(:user).
-      map { |attr_spec| attr_spec.first.to_s } - ["manifest_text"]
+  def load_select_param *args
     super
-  end
-
-  def find_object_by_uuid
-    super
-    if !@object and !params[:uuid].match(/^[0-9a-f]+\+\d+$/)
-      # Normalize the given uuid and search again.
-      hash_part = params[:uuid].match(/^([0-9a-f]*)/)[1]
-      collection = Collection.where('uuid like ?', hash_part + '+%').first
-      if collection
-        # We know the collection exists, and what its real uuid is in
-        # the database. Now, throw out @objects and repeat the usual
-        # lookup procedure. (Returning the collection at this point
-        # would bypass permission checks.)
-        @objects = nil
-        @where = { uuid: collection.uuid }
-        find_objects_for_index
-        @object = @objects.first
-      end
-    end
-  end
-
-  def munge_manifest_locators(manifest, &block)
-    self.class.munge_manifest_locators(manifest, &block)
-  end
-
-  def sign_manifests(*manifests)
-    if current_api_client_authorization
-      signing_opts = {
-        key: Rails.configuration.blob_signing_key,
-        api_token: current_api_client_authorization.api_token,
-        ttl: Rails.configuration.blob_signing_ttl,
-      }
-      manifests.each do |text|
-        munge_manifest_locators(text) do |loc|
-          Blob.sign_locator(loc.to_s, signing_opts)
-        end
-      end
+    if action_name == 'index'
+      # Omit manifest_text and unsigned_manifest_text from index results unless expressly selected.
+      @select ||= model_class.selectable_attributes - ["manifest_text", "unsigned_manifest_text"]
     end
   end
 end