Merge branch 'master' into 9998-unsigned_manifest
authorTom Clegg <tom@curoverse.com>
Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)
committerTom Clegg <tom@curoverse.com>
Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)
Conflicts:
services/api/app/models/collection.rb
services/keep-balance/collection.go

1  2 
sdk/go/arvados/collection.go
services/api/app/controllers/arvados/v1/collections_controller.rb
services/api/app/models/collection.rb
services/api/test/functional/arvados/v1/collections_controller_test.rb
services/keep-balance/collection.go

index 295943b38d5f32d7f0a433103bb06e0863a0ab34,9274238d2806c61a857414999a3b284ab7d2182b..157ce1678873af7f709c16752e590fb79b2ea822
@@@ -12,31 -12,28 +12,33 @@@ import 
  // Collection is an arvados#collection resource.
  type Collection struct {
        UUID                   string     `json:"uuid,omitempty"`
-       ExpiresAt              *time.Time `json:"expires_at,omitempty"`
+       TrashAt                *time.Time `json:"trash_at,omitempty"`
        ManifestText           string     `json:"manifest_text,omitempty"`
 +      UnsignedManifestText   string     `json:"unsigned_manifest_text,omitempty"`
        CreatedAt              *time.Time `json:"created_at,omitempty"`
        ModifiedAt             *time.Time `json:"modified_at,omitempty"`
        PortableDataHash       string     `json:"portable_data_hash,omitempty"`
        ReplicationConfirmed   *int       `json:"replication_confirmed,omitempty"`
        ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at,omitempty"`
        ReplicationDesired     *int       `json:"replication_desired,omitempty"`
+       DeleteAt               *time.Time `json:"delete_at,omitempty"`
+       IsTrashed              bool       `json:"is_trashed,omitempty"`
  }
  
  // SizedDigests returns the hash+size part of each data block
  // referenced by the collection.
  func (c *Collection) SizedDigests() ([]SizedDigest, error) {
 -      if c.ManifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
 +      manifestText := c.ManifestText
 +      if manifestText == "" {
 +              manifestText = c.UnsignedManifestText
 +      }
 +      if manifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
                // TODO: Check more subtle forms of corruption, too
                return nil, fmt.Errorf("manifest is missing")
        }
        var sds []SizedDigest
 -      scanner := bufio.NewScanner(strings.NewReader(c.ManifestText))
 -      scanner.Buffer(make([]byte, 1048576), len(c.ManifestText))
 +      scanner := bufio.NewScanner(strings.NewReader(manifestText))
 +      scanner.Buffer(make([]byte, 1048576), len(manifestText))
        for scanner.Scan() {
                line := scanner.Text()
                tokens := strings.Split(line, " ")
index 81accd8c5681ed06fbd10c6594719c941000bb5e,6c3589736e91d8762d04e1202a80c588c4257387..2beb1e714d7c3fd1af64731c9023b989caa9ed14
@@@ -1,6 -1,8 +1,8 @@@
  require "arvados/keep"
  
  class Arvados::V1::CollectionsController < ApplicationController
+   include DbCurrentTime
    def self.limit_index_columns_read
      ["manifest_text"]
    end
      super
    end
  
+   def find_objects_for_index
+     if params[:include_trash] || ['destroy', 'trash'].include?(action_name)
+       @objects = Collection.unscoped.readable_by(*@read_users)
+     end
+     super
+   end
    def find_object_by_uuid
      if loc = Keep::Locator.parse(params[:id])
        loc.strip_hints!
            manifest_text: c.signed_manifest_text,
          }
        end
+       true
      else
        super
      end
-     true
    end
  
    def show
      if @object.is_a? Collection
 +      # Omit unsigned_manifest_text
 +      @select ||= model_class.selectable_attributes - ["unsigned_manifest_text"]
        super
      else
        send_json @object
      end
    end
  
+   def destroy
+     if !@object.is_trashed
+       @object.update_attributes!(trash_at: db_current_time)
+     end
+     earliest_delete = (@object.trash_at +
+                        Rails.configuration.blob_signature_ttl.seconds)
+     if @object.delete_at > earliest_delete
+       @object.update_attributes!(delete_at: earliest_delete)
+     end
+     show
+   end
+   def trash
+     if !@object.is_trashed
+       @object.update_attributes!(trash_at: db_current_time)
+     end
+     show
+   end
    def find_collections(visited, sp, &b)
      case sp
      when ArvadosModel
            visited[uuid] = job.as_api_response
            if direction == :search_up
              # Follow upstream collections referenced in the script parameters
-             find_collections(visited, job) do |hash, uuid|
+             find_collections(visited, job) do |hash, col_uuid|
                search_edges(visited, hash, :search_up) if hash
-               search_edges(visited, uuid, :search_up) if uuid
+               search_edges(visited, col_uuid, :search_up) if col_uuid
              end
            elsif direction == :search_down
              # Follow downstream job output
    def load_limit_offset_order_params *args
      super
      if action_name == 'index'
 -      # Omit manifest_text from index results unless expressly selected.
 -      @select ||= model_class.selectable_attributes - ["manifest_text"]
 +      # Omit manifest_text and unsigned_manifest_text from index results unless expressly selected.
 +      @select ||= model_class.selectable_attributes - ["manifest_text", "unsigned_manifest_text"]
      end
    end
  end
index 879f0293ee32a43a56a478801281feee634c101f,4f774d6f9cc7029759a474938da991cbc83c2f46..f212e3358a4c8729d46f5edd09a2f2226a9b6a1b
@@@ -1,4 -1,5 +1,5 @@@
  require 'arvados/keep'
+ require 'sweep_trashed_collections'
  
  class Collection < ArvadosModel
    extend DbCurrentTime
@@@ -8,17 -9,21 +9,21 @@@
  
    serialize :properties, Hash
  
+   before_validation :set_validation_timestamp
    before_validation :default_empty_manifest
    before_validation :check_encoding
    before_validation :check_manifest_validity
    before_validation :check_signatures
    before_validation :strip_signatures_and_update_replication_confirmed
+   before_validation :ensure_trash_at_not_in_past
+   before_validation :sync_trash_state
+   before_validation :default_trash_interval
    validate :ensure_pdh_matches_manifest_text
+   validate :validate_trash_and_delete_timing
    before_save :set_file_names
-   before_save :expires_at_not_in_past
  
-   # Query only undeleted collections by default.
-   default_scope where("expires_at IS NULL or expires_at > statement_timestamp()")
+   # Query only untrashed collections by default.
+   default_scope where("is_trashed = false")
  
    api_accessible :user, extend: :common do |t|
      t.add :name
      t.add :properties
      t.add :portable_data_hash
      t.add :signed_manifest_text, as: :manifest_text
 +    t.add :manifest_text, as: :unsigned_manifest_text
      t.add :replication_desired
      t.add :replication_confirmed
      t.add :replication_confirmed_at
-     t.add :expires_at
+     t.add :delete_at
+     t.add :trash_at
+     t.add :is_trashed
+   end
+   after_initialize do
+     @signatures_checked = false
+     @computed_pdh_for_manifest_text = false
    end
  
    def self.attributes_required_columns
                  # API response, and never let clients select the
                  # manifest_text column.
                  #
-                 # We need expires_at to determine the correct
-                 # timestamp in signed_manifest_text.
-                 'manifest_text' => ['manifest_text', 'expires_at'],
+                 # We need trash_at and is_trashed to determine the
+                 # correct timestamp in signed_manifest_text.
+                 'manifest_text' => ['manifest_text', 'trash_at', 'is_trashed'],
 +                'unsigned_manifest_text' => ['manifest_text'],
                  )
    end
  
@@@ -63,7 -73,9 +75,9 @@@
      # subsequent passes without checking any signatures. This is
      # important because the signatures have probably been stripped off
      # by the time we get to a second validation pass!
-     return true if @signatures_checked and @signatures_checked == computed_pdh
+     if @signatures_checked && @signatures_checked == computed_pdh
+       return true
+     end
  
      if self.manifest_text_changed?
        # Check permissions on the collection manifest.
@@@ -72,7 -84,7 +86,7 @@@
        api_token = current_api_client_authorization.andand.api_token
        signing_opts = {
          api_token: api_token,
-         now: db_current_time.to_i,
+         now: @validation_timestamp.to_i,
        }
        self.manifest_text.each_line do |entry|
          entry.split.each do |tok|
          utf8 = manifest_text
          utf8.force_encoding Encoding::UTF_8
          if utf8.valid_encoding? and utf8 == manifest_text.encode(Encoding::UTF_8)
-           manifest_text = utf8
+           self.manifest_text = utf8
            return true
          end
        rescue
    end
  
    def signed_manifest_text
-     if has_attribute? :manifest_text
+     if !has_attribute? :manifest_text
+       return nil
+     elsif is_trashed
+       return manifest_text
+     else
        token = current_api_client_authorization.andand.api_token
        exp = [db_current_time.to_i + Rails.configuration.blob_signature_ttl,
-              expires_at].compact.map(&:to_i).min
-       @signed_manifest_text = self.class.sign_manifest manifest_text, token, exp
+              trash_at].compact.map(&:to_i).min
+       self.class.sign_manifest manifest_text, token, exp
      end
    end
  
      hash_part = nil
      size_part = nil
      uuid.split('+').each do |token|
-       if token.match /^[0-9a-f]{32,}$/
+       if token.match(/^[0-9a-f]{32,}$/)
          raise "uuid #{uuid} has multiple hash parts" if hash_part
          hash_part = token
-       elsif token.match /^\d+$/
+       elsif token.match(/^\d+$/)
          raise "uuid #{uuid} has multiple size parts" if size_part
          size_part = token
        end
      super - ["manifest_text"]
    end
  
+   def self.where *args
+     SweepTrashedCollections.sweep_if_stale
+     super
+   end
    protected
    def portable_manifest_text
      self.class.munge_manifest_locators(manifest_text) do |match|
      super
    end
  
-   # If expires_at is being changed to a time in the past, change it to
+   # Use a single timestamp for all validations, even though each
+   # validation runs at a different time.
+   def set_validation_timestamp
+     @validation_timestamp = db_current_time
+   end
+   # If trash_at is being changed to a time in the past, change it to
    # now. This allows clients to say "expires {client-current-time}"
    # without failing due to clock skew, while avoiding odd log entries
    # like "expiry date changed to {1 year ago}".
-   def expires_at_not_in_past
-     if expires_at_changed? and expires_at
-       self.expires_at = [db_current_time, expires_at].max
+   def ensure_trash_at_not_in_past
+     if trash_at_changed? && trash_at
+       self.trash_at = [@validation_timestamp, trash_at].max
      end
    end
+   # Caller can move into/out of trash by setting/clearing is_trashed
+   # -- however, if the caller also changes trash_at, then any changes
+   # to is_trashed are ignored.
+   def sync_trash_state
+     if is_trashed_changed? && !trash_at_changed?
+       if is_trashed
+         self.trash_at = @validation_timestamp
+       else
+         self.trash_at = nil
+         self.delete_at = nil
+       end
+     end
+     self.is_trashed = trash_at && trash_at <= @validation_timestamp || false
+     true
+   end
+   # If trash_at is updated without touching delete_at, automatically
+   # update delete_at to a sensible value.
+   def default_trash_interval
+     if trash_at_changed? && !delete_at_changed?
+       if trash_at.nil?
+         self.delete_at = nil
+       else
+         self.delete_at = trash_at + Rails.configuration.default_trash_lifetime.seconds
+       end
+     end
+   end
+   def validate_trash_and_delete_timing
+     if trash_at.nil? != delete_at.nil?
+       errors.add :delete_at, "must be set if trash_at is set, and must be nil otherwise"
+     end
+     earliest_delete = ([@validation_timestamp, trash_at_was].compact.min +
+                        Rails.configuration.blob_signature_ttl.seconds)
+     if delete_at && delete_at < earliest_delete
+       errors.add :delete_at, "#{delete_at} is too soon: earliest allowed is #{earliest_delete}"
+     end
+     if delete_at && delete_at < trash_at
+       errors.add :delete_at, "must not be earlier than trash_at"
+     end
+     true
+   end
  end
index c285f8db20e825a74bff16022ba98f5058fc116c,eeeedcee0183d2d7a2d0767c5c2ea5143d7e3fe2..4b87ebd41f75b27ce7d6f4b63c488e7595ad630e
@@@ -1,9 -1,8 +1,10 @@@
  require 'test_helper'
  
  class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
+   include DbCurrentTime
  
 +  PERM_TOKEN_RE = /\+A[[:xdigit:]]+@[[:xdigit:]]{8}\b/
 +
    def permit_unsigned_manifests isok=true
      # Set security model for the life of a test.
      Rails.configuration.permit_create_collection_with_unsigned_manifest = isok
    def assert_signed_manifest manifest_text, label=''
      assert_not_nil manifest_text, "#{label} manifest_text was nil"
      manifest_text.scan(/ [[:xdigit:]]{32}\S*/) do |tok|
 -      assert_match(/\+A[[:xdigit:]]+@[[:xdigit:]]{8}\b/, tok,
 +      assert_match(PERM_TOKEN_RE, tok,
                     "Locator in #{label} manifest_text was not signed")
      end
    end
  
 +  def assert_unsigned_manifest resp, label=''
 +    txt = resp['unsigned_manifest_text']
 +    assert_not_nil(txt, "#{label} unsigned_manifest_text was nil")
 +    locs = 0
 +    txt.scan(/ [[:xdigit:]]{32}\S*/) do |tok|
 +      locs += 1
 +      refute_match(PERM_TOKEN_RE, tok,
 +                   "Locator in #{label} unsigned_manifest_text was signed: #{tok}")
 +    end
 +    return locs
 +  end
 +
    test "should get index" do
      authorize_with :active
      get :index
             "basic Collections index included manifest_text")
    end
  
 -  test "collections.get returns signed locators" do
 +  test "collections.get returns signed locators, and no unsigned_manifest_text" do
      permit_unsigned_manifests
      authorize_with :active
      get :show, {id: collections(:foo_file).uuid}
      assert_response :success
      assert_signed_manifest json_response['manifest_text'], 'foo_file'
 +    refute_includes json_response, 'unsigned_manifest_text'
    end
  
    test "index with manifest_text selected returns signed locators" do
      assert(assigns(:objects).andand.any?,
             "no Collections returned for index with columns selected")
      json_response["items"].each do |coll|
 -      assert_equal(columns, columns & coll.keys,
 +      assert_equal(coll.keys - ['kind'], columns,
                     "Collections index did not respect selected columns")
        assert_signed_manifest coll['manifest_text'], coll['uuid']
      end
    end
  
 +  test "index with unsigned_manifest_text selected returns only unsigned locators" do
 +    authorize_with :active
 +    get :index, select: ['unsigned_manifest_text']
 +    assert_response :success
 +    assert_operator json_response["items"].count, :>, 0
 +    locs = 0
 +    json_response["items"].each do |coll|
 +      assert_equal(coll.keys - ['kind'], ['unsigned_manifest_text'],
 +                   "Collections index did not respect selected columns")
 +      locs += assert_unsigned_manifest coll, coll['uuid']
 +    end
 +    assert_operator locs, :>, 0, "no locators found in any manifests"
 +  end
 +
    test 'index without select returns everything except manifest' do
      authorize_with :active
      get :index
@@@ -823,11 -795,11 +824,11 @@@ EO
      [2**8, :success],
      [2**18, 422],
    ].each do |description_size, expected_response|
-     test "create collection with description size #{description_size}
+     # Descriptions are not part of search indexes. Skip until
+     # full-text search is implemented, at which point replace with a
+     # search in description.
+     skip "create collection with description size #{description_size}
            and expect response #{expected_response}" do
-       skip "(Descriptions are not part of search indexes. Skip until full-text search
-             is implemented, at which point replace with a search in description.)"
        authorize_with :active
  
        description = 'here is a collection with a very large description'
        assert_response 200
      end
    end
+   test 'get trashed collection with include_trash' do
+     uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+     authorize_with :active
+     get :show, {
+       id: uuid,
+       include_trash: true,
+     }
+     assert_response 200
+   end
+   test 'get trashed collection without include_trash' do
+     uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+     authorize_with :active
+     get :show, {
+       id: uuid,
+     }
+     assert_response 404
+   end
+   test 'trash collection using http DELETE verb' do
+     uuid = collections(:collection_owned_by_active).uuid
+     authorize_with :active
+     delete :destroy, {
+       id: uuid,
+     }
+     assert_response 200
+     c = Collection.unscoped.find_by_uuid(uuid)
+     assert_operator c.trash_at, :<, db_current_time
+     assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
+   end
+   test 'delete long-trashed collection immediately using http DELETE verb' do
+     uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+     authorize_with :active
+     delete :destroy, {
+       id: uuid,
+     }
+     assert_response 200
+     c = Collection.unscoped.find_by_uuid(uuid)
+     assert_operator c.trash_at, :<, db_current_time
+     assert_operator c.delete_at, :<, db_current_time
+   end
+   ['zzzzz-4zz18-mto52zx1s7sn3ih', # expired_collection
+    :empty_collection_name_in_active_user_home_project,
+   ].each do |fixture|
+     test "trash collection #{fixture} via trash action with grace period" do
+       if fixture.is_a? String
+         uuid = fixture
+       else
+         uuid = collections(fixture).uuid
+       end
+       authorize_with :active
+       time_before_trashing = db_current_time
+       post :trash, {
+         id: uuid,
+       }
+       assert_response 200
+       c = Collection.unscoped.find_by_uuid(uuid)
+       assert_operator c.trash_at, :<, db_current_time
+       assert_operator c.delete_at, :>=, time_before_trashing + Rails.configuration.default_trash_lifetime
+     end
+   end
  end
index 147d6165e31ae13db903a5861384c291c14d28a7,f12522fb6cd15350d4baf971498b06fb5c992c9d..b3a631e979a77a78f6c7a8b60fd012e77b360368
@@@ -30,7 -30,9 +30,9 @@@ func EachCollection(c *arvados.Client, 
                progress = func(_, _ int) {}
        }
  
-       expectCount, err := countCollections(c, arvados.ResourceListParams{})
+       expectCount, err := countCollections(c, arvados.ResourceListParams{
+               IncludeTrash: true,
+       })
        if err != nil {
                return err
        }
                limit = 1<<31 - 1
        }
        params := arvados.ResourceListParams{
-               Limit:  &limit,
-               Order:  "modified_at, uuid",
-               Select: []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+               Limit:        &limit,
+               Order:        "modified_at, uuid",
 -              Select:       []string{"uuid", "manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
++              Select:       []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+               IncludeTrash: true,
        }
        var last arvados.Collection
        var filterTime time.Time
        }
        progress(callCount, expectCount)
  
-       if checkCount, err := countCollections(c, arvados.ResourceListParams{Filters: []arvados.Filter{{
-               Attr:     "modified_at",
-               Operator: "<=",
-               Operand:  filterTime}}}); err != nil {
+       if checkCount, err := countCollections(c, arvados.ResourceListParams{
+               Filters: []arvados.Filter{{
+                       Attr:     "modified_at",
+                       Operator: "<=",
+                       Operand:  filterTime}},
+               IncludeTrash: true,
+       }); err != nil {
                return err
        } else if callCount < checkCount {
                return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, filterTime, checkCount)