// Collection is an arvados#collection resource.
type Collection struct {
UUID string `json:"uuid,omitempty"`
- ExpiresAt *time.Time `json:"expires_at,omitempty"`
+ TrashAt *time.Time `json:"trash_at,omitempty"`
ManifestText string `json:"manifest_text,omitempty"`
+ UnsignedManifestText string `json:"unsigned_manifest_text,omitempty"`
CreatedAt *time.Time `json:"created_at,omitempty"`
ModifiedAt *time.Time `json:"modified_at,omitempty"`
PortableDataHash string `json:"portable_data_hash,omitempty"`
ReplicationConfirmed *int `json:"replication_confirmed,omitempty"`
ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at,omitempty"`
ReplicationDesired *int `json:"replication_desired,omitempty"`
+ DeleteAt *time.Time `json:"delete_at,omitempty"`
+ IsTrashed bool `json:"is_trashed,omitempty"`
}
// SizedDigests returns the hash+size part of each data block
// referenced by the collection.
func (c *Collection) SizedDigests() ([]SizedDigest, error) {
- if c.ManifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
+ manifestText := c.ManifestText
+ if manifestText == "" {
+ manifestText = c.UnsignedManifestText
+ }
+ if manifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
// TODO: Check more subtle forms of corruption, too
return nil, fmt.Errorf("manifest is missing")
}
var sds []SizedDigest
- scanner := bufio.NewScanner(strings.NewReader(c.ManifestText))
- scanner.Buffer(make([]byte, 1048576), len(c.ManifestText))
+ scanner := bufio.NewScanner(strings.NewReader(manifestText))
+ scanner.Buffer(make([]byte, 1048576), len(manifestText))
for scanner.Scan() {
line := scanner.Text()
tokens := strings.Split(line, " ")
require "arvados/keep"
class Arvados::V1::CollectionsController < ApplicationController
+ include DbCurrentTime
+
def self.limit_index_columns_read
["manifest_text"]
end
super
end
+ def find_objects_for_index
+ if params[:include_trash] || ['destroy', 'trash'].include?(action_name)
+ @objects = Collection.unscoped.readable_by(*@read_users)
+ end
+ super
+ end
+
def find_object_by_uuid
if loc = Keep::Locator.parse(params[:id])
loc.strip_hints!
manifest_text: c.signed_manifest_text,
}
end
+ true
else
super
end
- true
end
def show
if @object.is_a? Collection
+ # Omit unsigned_manifest_text
+ @select ||= model_class.selectable_attributes - ["unsigned_manifest_text"]
super
else
send_json @object
end
end
+ def destroy
+ if !@object.is_trashed
+ @object.update_attributes!(trash_at: db_current_time)
+ end
+ earliest_delete = (@object.trash_at +
+ Rails.configuration.blob_signature_ttl.seconds)
+ if @object.delete_at > earliest_delete
+ @object.update_attributes!(delete_at: earliest_delete)
+ end
+ show
+ end
+
+ def trash
+ if !@object.is_trashed
+ @object.update_attributes!(trash_at: db_current_time)
+ end
+ show
+ end
+
def find_collections(visited, sp, &b)
case sp
when ArvadosModel
visited[uuid] = job.as_api_response
if direction == :search_up
# Follow upstream collections referenced in the script parameters
- find_collections(visited, job) do |hash, uuid|
+ find_collections(visited, job) do |hash, col_uuid|
search_edges(visited, hash, :search_up) if hash
- search_edges(visited, uuid, :search_up) if uuid
+ search_edges(visited, col_uuid, :search_up) if col_uuid
end
elsif direction == :search_down
# Follow downstream job output
def load_limit_offset_order_params *args
super
if action_name == 'index'
- # Omit manifest_text from index results unless expressly selected.
- @select ||= model_class.selectable_attributes - ["manifest_text"]
+ # Omit manifest_text and unsigned_manifest_text from index results unless expressly selected.
+ @select ||= model_class.selectable_attributes - ["manifest_text", "unsigned_manifest_text"]
end
end
end
require 'arvados/keep'
+ require 'sweep_trashed_collections'
class Collection < ArvadosModel
extend DbCurrentTime
serialize :properties, Hash
+ before_validation :set_validation_timestamp
before_validation :default_empty_manifest
before_validation :check_encoding
before_validation :check_manifest_validity
before_validation :check_signatures
before_validation :strip_signatures_and_update_replication_confirmed
+ before_validation :ensure_trash_at_not_in_past
+ before_validation :sync_trash_state
+ before_validation :default_trash_interval
validate :ensure_pdh_matches_manifest_text
+ validate :validate_trash_and_delete_timing
before_save :set_file_names
- before_save :expires_at_not_in_past
- # Query only undeleted collections by default.
- default_scope where("expires_at IS NULL or expires_at > statement_timestamp()")
+ # Query only untrashed collections by default.
+ default_scope where("is_trashed = false")
api_accessible :user, extend: :common do |t|
t.add :name
t.add :properties
t.add :portable_data_hash
t.add :signed_manifest_text, as: :manifest_text
+ t.add :manifest_text, as: :unsigned_manifest_text
t.add :replication_desired
t.add :replication_confirmed
t.add :replication_confirmed_at
- t.add :expires_at
+ t.add :delete_at
+ t.add :trash_at
+ t.add :is_trashed
+ end
+
+ after_initialize do
+ @signatures_checked = false
+ @computed_pdh_for_manifest_text = false
end
def self.attributes_required_columns
# API response, and never let clients select the
# manifest_text column.
#
- # We need expires_at to determine the correct
- # timestamp in signed_manifest_text.
- 'manifest_text' => ['manifest_text', 'expires_at'],
+ # We need trash_at and is_trashed to determine the
+ # correct timestamp in signed_manifest_text.
+ 'manifest_text' => ['manifest_text', 'trash_at', 'is_trashed'],
+ 'unsigned_manifest_text' => ['manifest_text'],
)
end
# subsequent passes without checking any signatures. This is
# important because the signatures have probably been stripped off
# by the time we get to a second validation pass!
- return true if @signatures_checked and @signatures_checked == computed_pdh
+ if @signatures_checked && @signatures_checked == computed_pdh
+ return true
+ end
if self.manifest_text_changed?
# Check permissions on the collection manifest.
api_token = current_api_client_authorization.andand.api_token
signing_opts = {
api_token: api_token,
- now: db_current_time.to_i,
+ now: @validation_timestamp.to_i,
}
self.manifest_text.each_line do |entry|
entry.split.each do |tok|
utf8 = manifest_text
utf8.force_encoding Encoding::UTF_8
if utf8.valid_encoding? and utf8 == manifest_text.encode(Encoding::UTF_8)
- manifest_text = utf8
+ self.manifest_text = utf8
return true
end
rescue
end
def signed_manifest_text
- if has_attribute? :manifest_text
+ if !has_attribute? :manifest_text
+ return nil
+ elsif is_trashed
+ return manifest_text
+ else
token = current_api_client_authorization.andand.api_token
exp = [db_current_time.to_i + Rails.configuration.blob_signature_ttl,
- expires_at].compact.map(&:to_i).min
- @signed_manifest_text = self.class.sign_manifest manifest_text, token, exp
+ trash_at].compact.map(&:to_i).min
+ self.class.sign_manifest manifest_text, token, exp
end
end
hash_part = nil
size_part = nil
uuid.split('+').each do |token|
- if token.match /^[0-9a-f]{32,}$/
+ if token.match(/^[0-9a-f]{32,}$/)
raise "uuid #{uuid} has multiple hash parts" if hash_part
hash_part = token
- elsif token.match /^\d+$/
+ elsif token.match(/^\d+$/)
raise "uuid #{uuid} has multiple size parts" if size_part
size_part = token
end
super - ["manifest_text"]
end
+ def self.where *args
+ SweepTrashedCollections.sweep_if_stale
+ super
+ end
+
protected
def portable_manifest_text
self.class.munge_manifest_locators(manifest_text) do |match|
super
end
- # If expires_at is being changed to a time in the past, change it to
+ # Use a single timestamp for all validations, even though each
+ # validation runs at a different time.
+ def set_validation_timestamp
+ @validation_timestamp = db_current_time
+ end
+
+ # If trash_at is being changed to a time in the past, change it to
# now. This allows clients to say "expires {client-current-time}"
# without failing due to clock skew, while avoiding odd log entries
# like "expiry date changed to {1 year ago}".
- def expires_at_not_in_past
- if expires_at_changed? and expires_at
- self.expires_at = [db_current_time, expires_at].max
+ def ensure_trash_at_not_in_past
+ if trash_at_changed? && trash_at
+ self.trash_at = [@validation_timestamp, trash_at].max
end
end
+
+ # Caller can move into/out of trash by setting/clearing is_trashed
+ # -- however, if the caller also changes trash_at, then any changes
+ # to is_trashed are ignored.
+ def sync_trash_state
+ if is_trashed_changed? && !trash_at_changed?
+ if is_trashed
+ self.trash_at = @validation_timestamp
+ else
+ self.trash_at = nil
+ self.delete_at = nil
+ end
+ end
+ self.is_trashed = trash_at && trash_at <= @validation_timestamp || false
+ true
+ end
+
+ # If trash_at is updated without touching delete_at, automatically
+ # update delete_at to a sensible value.
+ def default_trash_interval
+ if trash_at_changed? && !delete_at_changed?
+ if trash_at.nil?
+ self.delete_at = nil
+ else
+ self.delete_at = trash_at + Rails.configuration.default_trash_lifetime.seconds
+ end
+ end
+ end
+
+ def validate_trash_and_delete_timing
+ if trash_at.nil? != delete_at.nil?
+ errors.add :delete_at, "must be set if trash_at is set, and must be nil otherwise"
+ end
+
+ earliest_delete = ([@validation_timestamp, trash_at_was].compact.min +
+ Rails.configuration.blob_signature_ttl.seconds)
+ if delete_at && delete_at < earliest_delete
+ errors.add :delete_at, "#{delete_at} is too soon: earliest allowed is #{earliest_delete}"
+ end
+
+ if delete_at && delete_at < trash_at
+ errors.add :delete_at, "must not be earlier than trash_at"
+ end
+
+ true
+ end
end
require 'test_helper'
class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
+ include DbCurrentTime
+ PERM_TOKEN_RE = /\+A[[:xdigit:]]+@[[:xdigit:]]{8}\b/
+
def permit_unsigned_manifests isok=true
# Set security model for the life of a test.
Rails.configuration.permit_create_collection_with_unsigned_manifest = isok
def assert_signed_manifest manifest_text, label=''
assert_not_nil manifest_text, "#{label} manifest_text was nil"
manifest_text.scan(/ [[:xdigit:]]{32}\S*/) do |tok|
- assert_match(/\+A[[:xdigit:]]+@[[:xdigit:]]{8}\b/, tok,
+ assert_match(PERM_TOKEN_RE, tok,
"Locator in #{label} manifest_text was not signed")
end
end
+ def assert_unsigned_manifest resp, label=''
+ txt = resp['unsigned_manifest_text']
+ assert_not_nil(txt, "#{label} unsigned_manifest_text was nil")
+ locs = 0
+ txt.scan(/ [[:xdigit:]]{32}\S*/) do |tok|
+ locs += 1
+ refute_match(PERM_TOKEN_RE, tok,
+ "Locator in #{label} unsigned_manifest_text was signed: #{tok}")
+ end
+ return locs
+ end
+
test "should get index" do
authorize_with :active
get :index
"basic Collections index included manifest_text")
end
- test "collections.get returns signed locators" do
+ test "collections.get returns signed locators, and no unsigned_manifest_text" do
permit_unsigned_manifests
authorize_with :active
get :show, {id: collections(:foo_file).uuid}
assert_response :success
assert_signed_manifest json_response['manifest_text'], 'foo_file'
+ refute_includes json_response, 'unsigned_manifest_text'
end
test "index with manifest_text selected returns signed locators" do
assert(assigns(:objects).andand.any?,
"no Collections returned for index with columns selected")
json_response["items"].each do |coll|
- assert_equal(columns, columns & coll.keys,
+ assert_equal(coll.keys - ['kind'], columns,
"Collections index did not respect selected columns")
assert_signed_manifest coll['manifest_text'], coll['uuid']
end
end
+ test "index with unsigned_manifest_text selected returns only unsigned locators" do
+ authorize_with :active
+ get :index, select: ['unsigned_manifest_text']
+ assert_response :success
+ assert_operator json_response["items"].count, :>, 0
+ locs = 0
+ json_response["items"].each do |coll|
+ assert_equal(coll.keys - ['kind'], ['unsigned_manifest_text'],
+ "Collections index did not respect selected columns")
+ locs += assert_unsigned_manifest coll, coll['uuid']
+ end
+ assert_operator locs, :>, 0, "no locators found in any manifests"
+ end
+
test 'index without select returns everything except manifest' do
authorize_with :active
get :index
[2**8, :success],
[2**18, 422],
].each do |description_size, expected_response|
- test "create collection with description size #{description_size}
+ # Descriptions are not part of search indexes. Skip until
+ # full-text search is implemented, at which point replace with a
+ # search in description.
+ skip "create collection with description size #{description_size}
and expect response #{expected_response}" do
- skip "(Descriptions are not part of search indexes. Skip until full-text search
- is implemented, at which point replace with a search in description.)"
-
authorize_with :active
description = 'here is a collection with a very large description'
assert_response 200
end
end
+
+ test 'get trashed collection with include_trash' do
+ uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+ authorize_with :active
+ get :show, {
+ id: uuid,
+ include_trash: true,
+ }
+ assert_response 200
+ end
+
+ test 'get trashed collection without include_trash' do
+ uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+ authorize_with :active
+ get :show, {
+ id: uuid,
+ }
+ assert_response 404
+ end
+
+ test 'trash collection using http DELETE verb' do
+ uuid = collections(:collection_owned_by_active).uuid
+ authorize_with :active
+ delete :destroy, {
+ id: uuid,
+ }
+ assert_response 200
+ c = Collection.unscoped.find_by_uuid(uuid)
+ assert_operator c.trash_at, :<, db_current_time
+ assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
+ end
+
+ test 'delete long-trashed collection immediately using http DELETE verb' do
+ uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+ authorize_with :active
+ delete :destroy, {
+ id: uuid,
+ }
+ assert_response 200
+ c = Collection.unscoped.find_by_uuid(uuid)
+ assert_operator c.trash_at, :<, db_current_time
+ assert_operator c.delete_at, :<, db_current_time
+ end
+
+ ['zzzzz-4zz18-mto52zx1s7sn3ih', # expired_collection
+ :empty_collection_name_in_active_user_home_project,
+ ].each do |fixture|
+ test "trash collection #{fixture} via trash action with grace period" do
+ if fixture.is_a? String
+ uuid = fixture
+ else
+ uuid = collections(fixture).uuid
+ end
+ authorize_with :active
+ time_before_trashing = db_current_time
+ post :trash, {
+ id: uuid,
+ }
+ assert_response 200
+ c = Collection.unscoped.find_by_uuid(uuid)
+ assert_operator c.trash_at, :<, db_current_time
+ assert_operator c.delete_at, :>=, time_before_trashing + Rails.configuration.default_trash_lifetime
+ end
+ end
end
progress = func(_, _ int) {}
}
- expectCount, err := countCollections(c, arvados.ResourceListParams{})
+ expectCount, err := countCollections(c, arvados.ResourceListParams{
+ IncludeTrash: true,
+ })
if err != nil {
return err
}
limit = 1<<31 - 1
}
params := arvados.ResourceListParams{
- Limit: &limit,
- Order: "modified_at, uuid",
- Select: []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+ Limit: &limit,
+ Order: "modified_at, uuid",
- Select: []string{"uuid", "manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
++ Select: []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+ IncludeTrash: true,
}
var last arvados.Collection
var filterTime time.Time
}
progress(callCount, expectCount)
- if checkCount, err := countCollections(c, arvados.ResourceListParams{Filters: []arvados.Filter{{
- Attr: "modified_at",
- Operator: "<=",
- Operand: filterTime}}}); err != nil {
+ if checkCount, err := countCollections(c, arvados.ResourceListParams{
+ Filters: []arvados.Filter{{
+ Attr: "modified_at",
+ Operator: "<=",
+ Operand: filterTime}},
+ IncludeTrash: true,
+ }); err != nil {
return err
} else if callCount < checkCount {
return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, filterTime, checkCount)