+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require "arvados/keep"
+require "trashable"
+
class Arvados::V1::CollectionsController < ApplicationController
+ include DbCurrentTime
+ include TrashableController
+
+ def self._index_requires_parameters
+ (super rescue {}).
+ merge({
+ include_trash: {
+ type: 'boolean', required: false, default: false, description: "Include collections whose is_trashed attribute is true.",
+ },
+ include_old_versions: {
+ type: 'boolean', required: false, default: false, description: "Include past collection versions.",
+ },
+ })
+ end
+
+ def self._show_requires_parameters
+ (super rescue {}).
+ merge({
+ include_trash: {
+ type: 'boolean', required: false, default: false, description: "Show collection even if its is_trashed attribute is true.",
+ },
+ include_old_versions: {
+ type: 'boolean', required: false, default: true, description: "Include past collection versions.",
+ },
+ })
+ end
+
def create
- # Collections are owned by system_user. Creating a collection has
- # two effects: The collection is added if it doesn't already
- # exist, and a "permission" Link is added (if one doesn't already
- # exist) giving the current user (or specified owner_uuid)
- # permission to read it.
- owner_uuid = resource_attrs.delete(:owner_uuid) || current_user.uuid
- unless current_user.can? write: owner_uuid
- logger.warn "User #{current_user.andand.uuid} tried to set collection owner_uuid to #{owner_uuid}"
- raise ArvadosModel::PermissionDeniedError
+ if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
+ resource_attrs[:portable_data_hash] = loc.to_s
+ resource_attrs.delete :uuid
end
+ resource_attrs.delete :version
+ resource_attrs.delete :current_version_uuid
+ super
+ end
- # Check permissions on the collection manifest.
- # If any signature cannot be verified, return 403 Permission denied.
- api_token = current_api_client_authorization.andand.api_token
- signing_opts = {
- key: Rails.configuration.blob_signing_key,
- api_token: api_token,
- ttl: Rails.configuration.blob_signing_ttl,
- }
- resource_attrs[:manifest_text].lines.each do |entry|
- entry.split[1..-1].each do |tok|
- if /^[[:digit:]]+:[[:digit:]]+:/.match tok
- # This is a filename token, not a blob locator. Note that we
- # keep checking tokens after this, even though manifest
- # format dictates that all subsequent tokens will also be
- # filenames. Safety first!
- elsif Blob.verify_signature tok, signing_opts
- # OK.
- elsif Locator.parse(tok).andand.signature
- # Signature provided, but verify_signature did not like it.
- logger.warn "Invalid signature on locator #{tok}"
- raise ArvadosModel::PermissionDeniedError
- elsif Rails.configuration.permit_create_collection_with_unsigned_manifest
- # No signature provided, but we are running in insecure mode.
- logger.debug "Missing signature on locator #{tok} ignored"
- elsif Blob.new(tok).empty?
- # No signature provided -- but no data to protect, either.
- else
- logger.warn "Missing signature on locator #{tok}"
- raise ArvadosModel::PermissionDeniedError
- end
- end
+ def update
+ # preserve_version should be disabled unless explicitly asked otherwise.
+ if !resource_attrs[:preserve_version]
+ resource_attrs[:preserve_version] = false
end
+ super
+ end
- # Remove any permission signatures from the manifest.
- resource_attrs[:manifest_text]
- .gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) { |word|
- word.strip!
- loc = Locator.parse(word)
- if loc
- " " + loc.without_signature.to_s
- else
- " " + word
- end
+ def find_objects_for_index
+ opts = {
+ include_trash: params[:include_trash] || ['destroy', 'trash', 'untrash'].include?(action_name),
+ include_old_versions: params[:include_old_versions] || false,
}
+ @objects = Collection.readable_by(*@read_users, opts) if !opts.empty?
+ super
+ end
- # Save the collection with the stripped manifest.
- act_as_system_user do
- @object = model_class.new resource_attrs.reject { |k,v| k == :owner_uuid }
- begin
- @object.save!
- rescue ActiveRecord::RecordNotUnique
- logger.debug resource_attrs.inspect
- if @object.manifest_text and @object.uuid
- @existing_object = model_class.
- where('uuid=? and manifest_text=?',
- @object.uuid,
- @object.manifest_text).
- first
- @object = @existing_object || @object
- end
+ def find_object_by_uuid(with_lock: false)
+ if loc = Keep::Locator.parse(params[:id])
+ loc.strip_hints!
+
+ opts = {
+ include_trash: params[:include_trash],
+ include_old_versions: params[:include_old_versions],
+ }
+
+ # It matters which Collection object we pick because blob
+ # signatures depend on the value of trash_at.
+ #
+ # From postgres doc: "By default, null values sort as if larger
+ # than any non-null value; that is, NULLS FIRST is the default
+ # for DESC order, and NULLS LAST otherwise."
+ #
+ # "trash_at desc" sorts null first, then latest to earliest, so
+ # it will select the Collection object with the longest
+ # available lifetime.
+
+ select_attrs = (@select || ["manifest_text"]) | ["portable_data_hash", "trash_at"]
+ model = Collection
+ if with_lock && Rails.configuration.API.LockBeforeUpdate
+ model = model.lock
end
- if @object
- link_attrs = {
- owner_uuid: owner_uuid,
- link_class: 'permission',
- name: 'can_read',
- head_uuid: @object.uuid,
- tail_uuid: owner_uuid
+ if c = model.
+ readable_by(*@read_users, opts).
+ where({ portable_data_hash: loc.to_s }).
+ order("trash_at desc").
+ select(select_attrs.join(", ")).
+ limit(1).
+ first
+ @object = {
+ uuid: c.portable_data_hash,
+ portable_data_hash: c.portable_data_hash,
+ trash_at: c.trash_at,
}
- ActiveRecord::Base.transaction do
- if Link.where(link_attrs).empty?
- Link.create! link_attrs
- end
+ if select_attrs.index("manifest_text")
+ @object[:manifest_text] = c.manifest_text
end
end
+ else
+ super(with_lock: with_lock)
end
- show
end
def show
- if current_api_client_authorization
- signing_opts = {
- key: Rails.configuration.blob_signing_key,
- api_token: current_api_client_authorization.api_token,
- ttl: Rails.configuration.blob_signing_ttl,
- }
- @object[:manifest_text]
- .gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) { |word|
- word.strip!
- loc = Locator.parse(word)
- if loc
- " " + Blob.sign_locator(word, signing_opts)
- else
- " " + word
- end
- }
- end
- render json: @object.as_api_response(:with_data)
- end
-
- def collection_uuid(uuid)
- m = /([a-f0-9]{32}(\+[0-9]+)?)(\+.*)?/.match(uuid)
- if m
- m[1]
+ if @object.is_a? Collection
+ # Omit unsigned_manifest_text
+ @select ||= model_class.selectable_attributes - ["unsigned_manifest_text"]
+ super
else
- nil
+ send_json @object
end
end
- def script_param_edges(visited, sp)
+
+ def find_collections(visited, sp, ignore_columns=[], &b)
case sp
+ when ArvadosModel
+ sp.class.columns.each do |c|
+ find_collections(visited, sp[c.name.to_sym], &b) if !ignore_columns.include?(c.name)
+ end
when Hash
sp.each do |k, v|
- script_param_edges(visited, v)
+ find_collections(visited, v, &b)
end
when Array
sp.each do |v|
- script_param_edges(visited, v)
+ find_collections(visited, v, &b)
end
when String
- return if sp.empty?
- m = collection_uuid(sp)
- if m
- generate_provenance_edges(visited, m)
+ if m = /[a-f0-9]{32}\+\d+/.match(sp)
+ yield m[0], nil
+ elsif m = Collection.uuid_regex.match(sp)
+ yield nil, m[0]
end
end
end
- def generate_provenance_edges(visited, uuid)
- m = collection_uuid(uuid)
- uuid = m if m
-
- if not uuid or uuid.empty? or visited[uuid]
- return ""
+ def search_edges(visited, uuid, direction)
+ if uuid.nil? or uuid.empty? or visited[uuid]
+ return
end
- logger.debug "visiting #{uuid}"
+ if loc = Keep::Locator.parse(uuid)
+ loc.strip_hints!
+ return if visited[loc.to_s]
+ end
- if m
- # uuid is a collection
- Collection.readable_by(current_user).where(uuid: uuid).each do |c|
- visited[uuid] = c.as_api_response
- visited[uuid][:files] = []
- c.files.each do |f|
- visited[uuid][:files] << f
+ if loc
+ # uuid is a portable_data_hash
+ collections = Collection.readable_by(*@read_users).where(portable_data_hash: loc.to_s)
+ c = collections.limit(2).all
+ if c.size == 1
+ visited[loc.to_s] = c[0]
+ elsif c.size > 1
+ name = collections.limit(1).where("name <> ''").first
+ if name
+ visited[loc.to_s] = {
+ portable_data_hash: c[0].portable_data_hash,
+ name: "#{name.name} + #{collections.count-1} more"
+ }
+ else
+ visited[loc.to_s] = {
+ portable_data_hash: c[0].portable_data_hash,
+ name: loc.to_s
+ }
end
end
- Job.readable_by(current_user).where(output: uuid).each do |job|
- generate_provenance_edges(visited, job.uuid)
- end
-
- Job.readable_by(current_user).where(log: uuid).each do |job|
- generate_provenance_edges(visited, job.uuid)
- end
+ if direction == :search_up
+ # Search upstream for jobs where this locator is the output of some job
+ if !Rails.configuration.API.DisabledAPIs["jobs.list"]
+ Job.readable_by(*@read_users).where(output: loc.to_s).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
- else
- # uuid is something else
- rsc = ArvadosModel::resource_class_for_uuid uuid
- if rsc == Job
- Job.readable_by(current_user).where(uuid: uuid).each do |job|
- visited[uuid] = job.as_api_response
- script_param_edges(visited, job.script_parameters)
- end
- elsif rsc != nil
- rsc.where(uuid: uuid).each do |r|
- visited[uuid] = r.as_api_response
+ Job.readable_by(*@read_users).where(log: loc.to_s).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
end
- end
- end
-
- Link.readable_by(current_user).
- where(head_uuid: uuid, link_class: "provenance").
- each do |link|
- visited[link.uuid] = link.as_api_response
- generate_provenance_edges(visited, link.tail_uuid)
- end
-
- #puts "finished #{uuid}"
- end
-
- def provenance
- visited = {}
- generate_provenance_edges(visited, @object[:uuid])
- render json: visited
- end
- def generate_used_by_edges(visited, uuid)
- m = collection_uuid(uuid)
- uuid = m if m
+ Container.readable_by(*@read_users).where(output: loc.to_s).pluck(:uuid).each do |c_uuid|
+ search_edges(visited, c_uuid, :search_up)
+ end
- if not uuid or uuid.empty? or visited[uuid]
- return ""
- end
+ Container.readable_by(*@read_users).where(log: loc.to_s).pluck(:uuid).each do |c_uuid|
+ search_edges(visited, c_uuid, :search_up)
+ end
+ elsif direction == :search_down
+ if loc.to_s == "d41d8cd98f00b204e9800998ecf8427e+0"
+ # Special case, don't follow the empty collection.
+ return
+ end
- logger.debug "visiting #{uuid}"
+ # Search downstream for jobs where this locator is in script_parameters
+ if !Rails.configuration.API.DisabledAPIs["jobs.list"]
+ Job.readable_by(*@read_users).where(["jobs.script_parameters like ?", "%#{loc.to_s}%"]).each do |job|
+ search_edges(visited, job.uuid, :search_down)
+ end
- if m
- # uuid is a collection
- Collection.readable_by(current_user).where(uuid: uuid).each do |c|
- visited[uuid] = c.as_api_response
- visited[uuid][:files] = []
- c.files.each do |f|
- visited[uuid][:files] << f
+ Job.readable_by(*@read_users).where(["jobs.docker_image_locator = ?", "#{loc.to_s}"]).each do |job|
+ search_edges(visited, job.uuid, :search_down)
+ end
end
- end
-
- if uuid == "d41d8cd98f00b204e9800998ecf8427e+0"
- # special case for empty collection
- return
- end
- Job.readable_by(current_user).where(["jobs.script_parameters like ?", "%#{uuid}%"]).each do |job|
- generate_used_by_edges(visited, job.uuid)
+ Container.readable_by(*@read_users).where([Container.full_text_trgm + " like ?", "%#{loc.to_s}%"]).select("output, log, uuid").each do |c|
+ if c.output != loc.to_s && c.log != loc.to_s
+ search_edges(visited, c.uuid, :search_down)
+ end
+ end
end
-
else
- # uuid is something else
+ # uuid is a regular Arvados UUID
rsc = ArvadosModel::resource_class_for_uuid uuid
if rsc == Job
- Job.readable_by(current_user).where(uuid: uuid).each do |job|
+ Job.readable_by(*@read_users).where(uuid: uuid).each do |job|
visited[uuid] = job.as_api_response
- generate_used_by_edges(visited, job.output)
+ if direction == :search_up
+ # Follow upstream collections referenced in the script parameters
+ find_collections(visited, job) do |hash, col_uuid|
+ search_edges(visited, hash, :search_up) if hash
+ search_edges(visited, col_uuid, :search_up) if col_uuid
+ end
+ elsif direction == :search_down
+ # Follow downstream job output
+ search_edges(visited, job.output, direction)
+ end
+ end
+ elsif rsc == Container
+ c = Container.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+ if c
+ visited[uuid] = c.as_api_response
+ if direction == :search_up
+ # Follow upstream collections referenced in the script parameters
+ find_collections(visited, c, ignore_columns=["log", "output"]) do |hash, col_uuid|
+ search_edges(visited, hash, :search_up) if hash
+ search_edges(visited, col_uuid, :search_up) if col_uuid
+ end
+ elsif direction == :search_down
+ # Follow downstream job output
+ search_edges(visited, c.output, :search_down)
+ end
+ end
+ elsif rsc == ContainerRequest
+ c = ContainerRequest.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+ if c
+ visited[uuid] = c.as_api_response
+ if direction == :search_up
+ # Follow upstream collections
+ find_collections(visited, c, ignore_columns=["log_uuid", "output_uuid"]) do |hash, col_uuid|
+ search_edges(visited, hash, :search_up) if hash
+ search_edges(visited, col_uuid, :search_up) if col_uuid
+ end
+ elsif direction == :search_down
+ # Follow downstream job output
+ search_edges(visited, c.output_uuid, :search_down)
+ end
+ end
+ elsif rsc == Collection
+ c = Collection.readable_by(*@read_users).where(uuid: uuid).limit(1).first
+ if c
+ if direction == :search_up
+ visited[c.uuid] = c.as_api_response
+
+ if !Rails.configuration.API.DisabledAPIs["jobs.list"]
+ Job.readable_by(*@read_users).where(output: c.portable_data_hash).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
+
+ Job.readable_by(*@read_users).where(log: c.portable_data_hash).each do |job|
+ search_edges(visited, job.uuid, :search_up)
+ end
+ end
+
+ ContainerRequest.readable_by(*@read_users).where(output_uuid: uuid).pluck(:uuid).each do |cr_uuid|
+ search_edges(visited, cr_uuid, :search_up)
+ end
+
+ ContainerRequest.readable_by(*@read_users).where(log_uuid: uuid).pluck(:uuid).each do |cr_uuid|
+ search_edges(visited, cr_uuid, :search_up)
+ end
+ elsif direction == :search_down
+ search_edges(visited, c.portable_data_hash, :search_down)
+ end
end
elsif rsc != nil
rsc.where(uuid: uuid).each do |r|
end
end
- Link.readable_by(current_user).
- where(tail_uuid: uuid, link_class: "provenance").
- each do |link|
- visited[link.uuid] = link.as_api_response
- generate_used_by_edges(visited, link.head_uuid)
+ if direction == :search_up
+ # Search for provenance links pointing to the current uuid
+ Link.readable_by(*@read_users).
+ where(head_uuid: uuid, link_class: "provenance").
+ each do |link|
+ visited[link.uuid] = link.as_api_response
+ search_edges(visited, link.tail_uuid, direction)
+ end
+ elsif direction == :search_down
+ # Search for provenance links emanating from the current uuid
+ Link.readable_by(current_user).
+ where(tail_uuid: uuid, link_class: "provenance").
+ each do |link|
+ visited[link.uuid] = link.as_api_response
+ search_edges(visited, link.head_uuid, direction)
+ end
end
+ end
- #puts "finished #{uuid}"
+ def provenance
+ visited = {}
+ if @object[:uuid]
+ search_edges(visited, @object[:uuid], :search_up)
+ else
+ search_edges(visited, @object[:portable_data_hash], :search_up)
+ end
+ send_json visited
end
def used_by
visited = {}
- generate_used_by_edges(visited, @object[:uuid])
- render json: visited
+ if @object[:uuid]
+ search_edges(visited, @object[:uuid], :search_down)
+ else
+ search_edges(visited, @object[:portable_data_hash], :search_down)
+ end
+ send_json visited
end
protected
- def find_object_by_uuid
+
+ def load_select_param *args
super
- if !@object and !params[:uuid].match(/^[0-9a-f]+\+\d+$/)
- # Normalize the given uuid and search again.
- hash_part = params[:uuid].match(/^([0-9a-f]*)/)[1]
- collection = Collection.where('uuid like ?', hash_part + '+%').first
- if collection
- # We know the collection exists, and what its real uuid is in
- # the database. Now, throw out @objects and repeat the usual
- # lookup procedure. (Returning the collection at this point
- # would bypass permission checks.)
- @objects = nil
- @where = { uuid: collection.uuid }
- find_objects_for_index
- @object = @objects.first
- end
+ if action_name == 'index'
+ # Omit manifest_text and unsigned_manifest_text from index results unless expressly selected.
+ @select ||= model_class.selectable_attributes - ["manifest_text", "unsigned_manifest_text"]
end
end
end