Server-side components of Arvados contained in the apps/ and services/
-directories, including the API Server, Workbench, and Crunch, are licenced
+directories, including the API Server, Workbench, and Crunch, are licensed
under the GNU Affero General Public License version 3 (see agpl-3.0.txt)
The Arvados client Software Development Kits contained in the sdk/ directory,
sharing_popup
end
+ def update
+ @updates ||= params[@object.resource_param_name.to_sym]
+ if @updates && (@updates.keys - ["name", "description"]).empty?
+ # exclude manifest_text since only name or description is being updated
+ @object.manifest_text = nil
+ end
+ super
+ end
+
protected
def find_usable_token(token_list)
assert_equal 'collection created with properties', assigns(:object).name
assert_equal 'value_1', assigns(:object).properties[:property_1]
end
+
+ test "update description and check manifest_text is not lost" do
+ collection = api_fixture("collections")["multilevel_collection_1"]
+ post :update, {
+ id: collection["uuid"],
+ collection: {
+ description: 'test description update'
+ },
+ format: :json
+ }, session_for(:active)
+ assert_response :success
+ assert_not_nil assigns(:object)
+ assert_equal 'test description update', assigns(:object).description
+ assert_equal collection['manifest_text'], assigns(:object).manifest_text
+ end
end
table(table table-bordered table-condensed).
|_. Attribute|_. Type|_. Description|_. Example|
-|locator|string|||
-|portable_data_hash|string|||
|name|string|||
-|redundancy|number|||
-|redundancy_confirmed_by_client_uuid|string|API client||
-|redundancy_confirmed_at|datetime|||
-|redundancy_confirmed_as|number|||
+|description|text|||
+|portable_data_hash|string|||
|manifest_text|text|||
+|replication_desired|number|Minimum storage replication level desired for each data block referenced by this collection. A value of @null@ signifies that the site default replication level (typically 2) is desired.|@2@|
+|replication_confirmed|number|Replication level most recently confirmed by the storage system. This field is null when a collection is first created, and is reset to null when the manifest_text changes in a way that introduces a new data block. An integer value indicates the replication level of the _least replicated_ data block in the collection.|@2@, null|
+|replication_confirmed_at|datetime|When replication_confirmed was confirmed. If replication_confirmed is null, this field is also null.||
print >>stderr, error
sys.exit(1)
- # Apply default replication, if none specified. TODO (#3410): Use
- # default replication given by discovery document.
- if args.replication <= 0:
- args.replication = 2
+ # write_copies diverges from args.replication here.
+ # args.replication is how many copies we will instruct Arvados to
+ # maintain (by passing it in collections().create()) after all
+ # data is written -- and if None was given, we'll use None there.
+ # Meanwhile, write_copies is how many copies of each data block we
+ # write to Keep, which has to be a number.
+ #
+ # If we simply changed args.replication from None to a default
+ # here, we'd end up erroneously passing the default replication
+ # level (instead of None) to collections().create().
+ write_copies = (args.replication or
+ api_client._rootDesc.get('defaultCollectionReplication', 2))
if args.progress:
reporter = progress_writer(human_progress)
writer = ArvPutCollectionWriter(
resume_cache, reporter, bytes_expected,
num_retries=args.retries,
- replication=args.replication)
+ replication=write_copies)
else:
writer = ArvPutCollectionWriter.from_cache(
resume_cache, reporter, bytes_expected,
num_retries=args.retries,
- replication=args.replication)
+ replication=write_copies)
# Install our signal handler for each code in CAUGHT_SIGNALS, and save
# the originals.
manifest_text = CollectionReader(manifest_text).manifest_text(normalize=True)
replication_attr = 'replication_desired'
if api_client._schema.schemas['Collection']['properties'].get(replication_attr, None) is None:
- # API calls it 'redundancy' until #3410.
+ # API called it 'redundancy' before #3410.
replication_attr = 'redundancy'
# Register the resulting collection in Arvados.
collection = api_client.collections().create(
def test_put_collection_with_default_redundancy(self):
collection = self.run_and_find_collection("")
- self.assertEqual(2, collection['replication_desired'])
+ self.assertEqual(None, collection['replication_desired'])
def test_put_collection_with_unnamed_project_link(self):
link = self.run_and_find_collection(
title: "Arvados API",
description: "The API to interact with Arvados.",
documentationLink: "http://doc.arvados.org/api/index.html",
+ defaultCollectionReplication: Rails.configuration.default_collection_replication,
protocol: "rest",
baseUrl: root_url + "arvados/v1/",
basePath: "/arvados/v1/",
before_validation :check_signatures
before_validation :strip_manifest_text
before_validation :set_portable_data_hash
+ before_validation :maybe_clear_replication_confirmed
validate :ensure_hash_matches_manifest_text
before_save :set_file_names
t.add :portable_data_hash
t.add :signed_manifest_text, as: :manifest_text
t.add :replication_desired
+ t.add :replication_confirmed
+ t.add :replication_confirmed_at
end
def self.attributes_required_columns
# API response, and never let clients select the
# manifest_text column.
'manifest_text' => ['manifest_text'],
-
- # This is a shim until the database column gets
- # renamed to replication_desired in #3410.
- 'replication_desired' => ['redundancy'],
)
end
end
end
- def replication_desired
- # Shim until database columns get fixed up in #3410.
- redundancy or 2
- end
-
- def redundancy_status
- if redundancy_confirmed_as.nil?
- 'unconfirmed'
- elsif redundancy_confirmed_as < redundancy
- 'degraded'
- else
- if redundancy_confirmed_at.nil?
- 'unconfirmed'
- elsif Time.now - redundancy_confirmed_at < 7.days
- 'OK'
- else
- 'stale'
- end
- end
- end
-
def signed_manifest_text
if has_attribute? :manifest_text
token = current_api_client_authorization.andand.api_token
def self.munge_manifest_locators! manifest
# Given a manifest text and a block, yield each locator,
# and replace it with whatever the block returns.
- manifest.andand.gsub!(/ [[:xdigit:]]{32}(\+[[:digit:]]+)?(\+\S+)/) do |word|
+ manifest.andand.gsub!(/ [[:xdigit:]]{32}(\+\S+)?/) do |word|
if loc = Keep::Locator.parse(word.strip)
" " + yield(loc)
else
end
end
+ def self.each_manifest_locator manifest
+ # Given a manifest text and a block, yield each locator.
+ manifest.andand.scan(/ ([[:xdigit:]]{32}(\+\S+)?)/) do |word, _|
+ if loc = Keep::Locator.parse(word)
+ yield loc
+ end
+ end
+ end
+
def self.normalize_uuid uuid
hash_part = nil
size_part = nil
def portable_manifest_text
portable_manifest = self[:manifest_text].dup
self.class.munge_manifest_locators!(portable_manifest) do |loc|
- loc.hash + '+' + loc.size.to_s
+ if loc.size
+ loc.hash + '+' + loc.size.to_s
+ else
+ loc.hash
+ end
end
portable_manifest
end
'+' +
portable_manifest.bytesize.to_s)
end
+
+ def maybe_clear_replication_confirmed
+ if manifest_text_changed?
+ # If the new manifest_text contains locators whose hashes
+ # weren't in the old manifest_text, storage replication is no
+ # longer confirmed.
+ in_old_manifest = {}
+ self.class.each_manifest_locator(manifest_text_was) do |loc|
+ in_old_manifest[loc.hash] = true
+ end
+ self.class.each_manifest_locator(manifest_text) do |loc|
+ if not in_old_manifest[loc.hash]
+ self.replication_confirmed_at = nil
+ self.replication_confirmed = nil
+ break
+ end
+ end
+ end
+ end
+
+ def ensure_permission_to_save
+ if (not current_user.andand.is_admin and
+ (replication_confirmed_at_changed? or replication_confirmed_changed?) and
+ not (replication_confirmed_at.nil? and replication_confirmed.nil?))
+ raise ArvadosModel::PermissionDeniedError.new("replication_confirmed and replication_confirmed_at attributes cannot be changed, except by setting both to nil")
+ end
+ super
+ end
end
# Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
# On (sso) server. Should only be enabled during development when the SSO
# server is using a self-signed cert.
- sso_insecure: false
\ No newline at end of file
+ sso_insecure: false
+
+ # Default replication level for collections. This is used when a
+ # collection's replication_desired attribute is nil.
+ default_collection_replication: 2
--- /dev/null
+class RenameReplicationAttributes < ActiveRecord::Migration
+ RENAME = [[:redundancy, :replication_desired],
+ [:redundancy_confirmed_as, :replication_confirmed],
+ [:redundancy_confirmed_at, :replication_confirmed_at]]
+
+ def up
+ RENAME.each do |oldname, newname|
+ rename_column :collections, oldname, newname
+ end
+ remove_column :collections, :redundancy_confirmed_by_client_uuid
+ Collection.reset_column_information
+
+ # Removing that column dropped some search indexes. Let's put them back.
+ add_index :collections, ["owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "portable_data_hash", "uuid", "name", "file_names"], name: 'collections_search_index'
+ execute "CREATE INDEX collections_full_text_search_idx ON collections USING gin(#{Collection.full_text_tsvector});"
+ end
+
+ def down
+ remove_index :collections, name: 'collections_search_index'
+ add_column :collections, :redundancy_confirmed_by_client_uuid, :string
+ RENAME.reverse.each do |oldname, newname|
+ rename_column :collections, newname, oldname
+ end
+ remove_index :collections, :name => 'collections_full_text_search_idx'
+ Collection.reset_column_information
+
+ execute "CREATE INDEX collections_full_text_search_idx ON collections USING gin(#{Collection.full_text_tsvector});"
+ add_index :collections, ["owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "portable_data_hash", "uuid", "name", "file_names", "redundancy_confirmed_by_client_uuid"], name: 'collections_search_index'
+ end
+end
modified_by_user_uuid character varying(255),
modified_at timestamp without time zone,
portable_data_hash character varying(255),
- redundancy integer,
- redundancy_confirmed_by_client_uuid character varying(255),
- redundancy_confirmed_at timestamp without time zone,
- redundancy_confirmed_as integer,
+ replication_desired integer,
+ replication_confirmed_at timestamp without time zone,
+ replication_confirmed integer,
updated_at timestamp without time zone NOT NULL,
uuid character varying(255),
manifest_text text,
-- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace:
--
-CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(redundancy_confirmed_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(file_names, ''::character varying))::text)));
+CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(file_names, ''::character varying))::text)));
--
-- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -; Tablespace:
--
-CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, redundancy_confirmed_by_client_uuid, uuid, name, file_names);
+CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, file_names);
--
INSERT INTO schema_migrations (version) VALUES ('20150203180223');
-INSERT INTO schema_migrations (version) VALUES ('20150206210804');
\ No newline at end of file
+INSERT INTO schema_migrations (version) VALUES ('20150206210804');
+
+INSERT INTO schema_migrations (version) VALUES ('20150206230342');
\ No newline at end of file
name: collection_with_some_unique_words
description: The quick_brown_fox jumps over the lazy_dog
+replication_undesired_unconfirmed:
+ owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ created_at: 2015-02-07 00:19:28.596506247 Z
+ modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ modified_at: 2015-02-07 00:19:28.596338465 Z
+ portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+ replication_desired: ~
+ replication_confirmed_at: ~
+ replication_confirmed: ~
+ updated_at: 2015-02-07 00:19:28.596236608 Z
+ uuid: zzzzz-4zz18-wjxq7uzx2m9jj4a
+ manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"
+ name: replication want=null have=null
+
+replication_desired_2_unconfirmed:
+ owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ created_at: 2015-02-07 00:21:35.050333515 Z
+ modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ modified_at: 2015-02-07 00:21:35.050189104 Z
+ portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+ replication_desired: 2
+ replication_confirmed_at: ~
+ replication_confirmed: ~
+ updated_at: 2015-02-07 00:21:35.050126576 Z
+ uuid: zzzzz-4zz18-3t236wrz4769h7x
+ manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"
+ name: replication want=2 have=null
+
+replication_desired_2_confirmed_2:
+ owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ created_at: 2015-02-07 00:19:28.596506247 Z
+ modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ modified_at: 2015-02-07 00:19:28.596338465 Z
+ portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+ replication_desired: 2
+ replication_confirmed_at: 2015-02-07 00:24:52.983381227 Z
+ replication_confirmed: 2
+ updated_at: 2015-02-07 00:24:52.983381227 Z
+ uuid: zzzzz-4zz18-434zv1tnnf2rygp
+ manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:3:foo 3:6:bar\n"
+ name: replication want=2 have=2
+
collection_with_empty_properties:
uuid: zzzzz-4zz18-emptyproperties
portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
end
[1, 5, nil].each do |ask|
- test "Set replication_desired=#{ask} using redundancy attr" do
- # The Python SDK checks the Collection schema in the discovery
- # doc, then asks for 'redundancy' or 'replication_desired'
- # accordingly, so it isn't necessary to maintain backward
- # compatibility here when the attribute changes to
- # replication_desired.
+ test "Set replication_desired=#{ask.inspect}" do
+ Rails.configuration.default_collection_replication = 2
authorize_with :active
put :update, {
- id: collections(:collection_owned_by_active).uuid,
+ id: collections(:replication_undesired_unconfirmed).uuid,
collection: {
- redundancy: ask,
+ replication_desired: ask,
},
}
assert_response :success
- assert_equal (ask or 2), json_response['replication_desired']
+ assert_equal ask, json_response['replication_desired']
end
end
indexes = ActiveRecord::Base.connection.indexes(table)
search_index_by_columns = indexes.select do |index|
- index.columns == search_index_columns
+ index.columns.sort == search_index_columns.sort
end
search_index_by_name = indexes.select do |index|
index.name == "#{table}_search_index"
end
end
+ test 'portable data hash with missing size hints' do
+ [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x",
+ ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x"],
+ [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x",
+ ". d41d8cd98f00b204e9800998ecf8427e 0:0:x"],
+ [". d41d8cd98f00b204e9800998ecf8427e 0:0:x",
+ ". d41d8cd98f00b204e9800998ecf8427e 0:0:x"],
+ ].each do |unportable, portable|
+ c = Collection.new(manifest_text: unportable)
+ assert c.valid?
+ assert_equal(Digest::MD5.hexdigest(portable)+"+#{portable.length}",
+ c.portable_data_hash)
+ end
+ end
+
[0, 2, 4, nil].each do |ask|
- test "replication_desired reports #{ask or 2} if redundancy is #{ask}" do
+ test "set replication_desired to #{ask.inspect}" do
+ Rails.configuration.default_collection_replication = 2
act_as_user users(:active) do
- c = collections(:collection_owned_by_active)
- c.update_attributes redundancy: ask
- assert_equal (ask or 2), c.replication_desired
+ c = collections(:replication_undesired_unconfirmed)
+ c.update_attributes replication_desired: ask
+ assert_equal ask, c.replication_desired
end
end
end
+ test "replication_confirmed* can be set by admin user" do
+ c = collections(:replication_desired_2_unconfirmed)
+ act_as_user users(:admin) do
+ assert c.update_attributes(replication_confirmed: 2,
+ replication_confirmed_at: Time.now)
+ end
+ end
+
+ test "replication_confirmed* cannot be set by non-admin user" do
+ act_as_user users(:active) do
+ c = collections(:replication_desired_2_unconfirmed)
+ # Cannot set just one at a time.
+ assert_raise ArvadosModel::PermissionDeniedError do
+ c.update_attributes replication_confirmed: 1
+ end
+ assert_raise ArvadosModel::PermissionDeniedError do
+ c.update_attributes replication_confirmed_at: Time.now
+ end
+ # Cannot set both at once, either.
+ assert_raise ArvadosModel::PermissionDeniedError do
+ c.update_attributes(replication_confirmed: 1,
+ replication_confirmed_at: Time.now)
+ end
+ end
+ end
+
+ test "replication_confirmed* can be cleared (but only together) by non-admin user" do
+ act_as_user users(:active) do
+ c = collections(:replication_desired_2_confirmed_2)
+ # Cannot clear just one at a time.
+ assert_raise ArvadosModel::PermissionDeniedError do
+ c.update_attributes replication_confirmed: nil
+ end
+ c.reload
+ assert_raise ArvadosModel::PermissionDeniedError do
+ c.update_attributes replication_confirmed_at: nil
+ end
+ # Can clear both at once.
+ c.reload
+ assert c.update_attributes(replication_confirmed: nil,
+ replication_confirmed_at: nil)
+ end
+ end
+
+ test "clear replication_confirmed* when introducing a new block in manifest" do
+ c = collections(:replication_desired_2_confirmed_2)
+ act_as_user users(:active) do
+ assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
+ assert_nil c.replication_confirmed
+ assert_nil c.replication_confirmed_at
+ end
+ end
+
+ test "don't clear replication_confirmed* when just renaming a file" do
+ c = collections(:replication_desired_2_confirmed_2)
+ act_as_user users(:active) do
+ new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
+ assert c.update_attributes(manifest_text: new_manifest)
+ assert_equal 2, c.replication_confirmed
+ assert_not_nil c.replication_confirmed_at
+ end
+ end
+
+ test "don't clear replication_confirmed* when just deleting a data block" do
+ c = collections(:replication_desired_2_confirmed_2)
+ act_as_user users(:active) do
+ new_manifest = c.signed_manifest_text
+ new_manifest.sub!(/ \S+:bar/, '')
+ new_manifest.sub!(/ acbd\S+/, '')
+
+ # Confirm that we did just remove a block from the manifest (if
+ # not, this test would pass without testing the relevant case):
+ assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
+
+ assert c.update_attributes(manifest_text: new_manifest)
+ assert_equal 2, c.replication_confirmed
+ assert_not_nil c.replication_confirmed_at
+ end
+ end
+
test "create collection with properties" do
act_as_system_user do
c = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n",