From: Tom Clegg Date: Mon, 30 Aug 2021 17:29:33 +0000 (-0400) Subject: Merge branch '17217-collection-signatures' into main X-Git-Tag: 2.3.0~93 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/d8e3a67d508e9a5f5c01884259c0e75a140f64e9?hp=d67a11c2ee42159dd0ecd8f6ef39af38b6380dfd Merge branch '17217-collection-signatures' into main closes #17217 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/build/run-build-packages-one-target.sh b/build/run-build-packages-one-target.sh index 81aac9c616..7a91cb4de1 100755 --- a/build/run-build-packages-one-target.sh +++ b/build/run-build-packages-one-target.sh @@ -110,6 +110,7 @@ while [ $# -gt 0 ]; do echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$" exit 1 else + [[ "$2" =~ (.*)-(.*) ]] ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}" ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}" fi diff --git a/doc/api/methods.html.textile.liquid b/doc/api/methods.html.textile.liquid index 670a9e0da3..e051ab66fa 100644 --- a/doc/api/methods.html.textile.liquid +++ b/doc/api/methods.html.textile.liquid @@ -100,12 +100,16 @@ The following operators are available. table(table table-bordered table-condensed). |_. Operator|_. Operand type|_. Description|_. Example| -|@=@, @!=@|string, number, timestamp, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ @["tail_uuid","!=",null]@| +|@=@, @!=@, @<>@|string, number, timestamp, JSON-encoded array, JSON-encoded object, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ +@["tail_uuid","!=",null]@ +@["storage_classes_desired","=","[\"default\"]"]@| |@<@, @<=@, @>=@, @>@|string, number, or timestamp|Ordering comparison|@["script_version",">","123"]@| |@like@, @ilike@|string|SQL pattern match. Single character match is @_@ and wildcard is @%@. The @ilike@ operator is case-insensitive|@["script_version","like","d00220fb%"]@| |@in@, @not in@|array of strings|Set membership|@["script_version","in",["main","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@| |@is_a@|string|Arvados object type|@["head_uuid","is_a","arvados#collection"]@| -|@exists@|string|Test if a subproperty is present.|@["properties","exists","my_subproperty"]@| +|@exists@|string|Presence of subproperty|@["properties","exists","my_subproperty"]@| +|@contains@|string, array of strings|Presence of one or more keys or array elements|@["storage_classes_desired", "contains", ["foo", "bar"]]@ (matches both @["foo", "bar"]@ and @["foo", "bar", "baz"]@) +(note @[..., "contains", "foo"]@ is also accepted, and is equivalent to @[..., "contains", ["foo"]]@)| h4(#substringsearchfilter). Filtering using substring search @@ -128,7 +132,7 @@ table(table table-bordered table-condensed). |@like@, @ilike@|string|SQL pattern match, single character match is @_@ and wildcard is @%@, ilike is case-insensitive|@["properties.my_subproperty", "like", "d00220fb%"]@| |@in@, @not in@|array of strings|Set membership|@["properties.my_subproperty", "in", ["fizz", "buzz"]]@| |@exists@|boolean|Test if a subproperty is present or not (determined by operand).|@["properties.my_subproperty", "exists", true]@| -|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.| +|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["properties.foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.| Note that exclusion filters @!=@ and @not in@ will return records for which the property is not defined at all. To restrict filtering to records on which the subproperty is defined, combine with an @exists@ filter. diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid index fd4a36f291..6c1cc691c3 100644 --- a/doc/api/methods/collections.html.textile.liquid +++ b/doc/api/methods/collections.html.textile.liquid @@ -32,7 +32,10 @@ table(table table-bordered table-condensed). |manifest_text|text||| |replication_desired|number|Minimum storage replication level desired for each data block referenced by this collection. A value of @null@ signifies that the site default replication level (typically 2) is desired.|@2@| |replication_confirmed|number|Replication level most recently confirmed by the storage system. This field is null when a collection is first created, and is reset to null when the manifest_text changes in a way that introduces a new data block. An integer value indicates the replication level of the _least replicated_ data block in the collection.|@2@, null| -|replication_confirmed_at|datetime|When replication_confirmed was confirmed. If replication_confirmed is null, this field is also null.|| +|replication_confirmed_at|datetime|When @replication_confirmed@ was confirmed. If @replication_confirmed@ is null, this field is also null.|| +|storage_classes_desired|list|An optional list of storage class names where the blocks should be saved. If not provided, the cluster's default storage class(es) will be set.|@['archival']@| +|storage_classes_confirmed|list|Storage classes most recently confirmed by the storage system. This field is an empty list when a collection is first created.|@'archival']@, @[]@| +|storage_classes_confirmed_at|datetime|When @storage_classes_confirmed@ was confirmed. If @storage_classes_confirmed@ is @[]@, this field is null.|| |trash_at|datetime|If @trash_at@ is non-null and in the past, this collection will be hidden from API calls. May be untrashed.|| |delete_at|datetime|If @delete_at@ is non-null and in the past, the collection may be permanently deleted.|| |is_trashed|boolean|True if @trash_at@ is in the past, false if not.|| diff --git a/lib/config/export.go b/lib/config/export.go index 065011cc2e..92e2d7b4d5 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -105,7 +105,7 @@ var whitelist = map[string]bool{ "Collections.PreserveVersionIfIdle": true, "Collections.S3FolderObjects": true, "Collections.TrashSweepInterval": false, - "Collections.TrustAllContent": false, + "Collections.TrustAllContent": true, "Collections.WebDAVCache": false, "Collections.KeepproxyPermission": false, "Collections.WebDAVPermission": false, diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 44d0325353..50cb703a56 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -1296,8 +1296,8 @@ class Collection(RichCollectionBase): :storage_classes_desired: A list of storage class names where to upload the data. If None, - the keepstores are expected to store the data into their default - storage class. + the keep client is expected to store the data into the cluster's + default storage class(es). """ diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py index 86b1d91b82..9dfe0436de 100644 --- a/sdk/python/arvados/keep.py +++ b/sdk/python/arvados/keep.py @@ -842,6 +842,7 @@ class KeepClient(object): self.hits_counter = Counter() self.misses_counter = Counter() self._storage_classes_unsupported_warning = False + self._default_classes = [] if local_store: self.local_store = local_store @@ -882,6 +883,12 @@ class KeepClient(object): self._writable_services = None self.using_proxy = None self._static_services_list = False + try: + self._default_classes = [ + k for k, v in self.api_client.config()['StorageClasses'].items() if v['Default']] + except KeyError: + # We're talking to an old cluster + pass def current_timeout(self, attempt_number): """Return the appropriate timeout to use for this client. @@ -1174,7 +1181,7 @@ class KeepClient(object): "failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service") @retry.retry_method - def put(self, data, copies=2, num_retries=None, request_id=None, classes=[]): + def put(self, data, copies=2, num_retries=None, request_id=None, classes=None): """Save data in Keep. This method will get a list of Keep services from the API server, and @@ -1195,6 +1202,8 @@ class KeepClient(object): be written. """ + classes = classes or self._default_classes + if not isinstance(data, bytes): data = data.encode() diff --git a/sdk/python/tests/arvados_testutil.py b/sdk/python/tests/arvados_testutil.py index f251ea654b..d9b3ca86c4 100644 --- a/sdk/python/tests/arvados_testutil.py +++ b/sdk/python/tests/arvados_testutil.py @@ -190,7 +190,13 @@ class MockStreamReader(object): class ApiClientMock(object): def api_client_mock(self): - return mock.MagicMock(name='api_client_mock') + api_mock = mock.MagicMock(name='api_client_mock') + api_mock.config.return_value = { + 'StorageClasses': { + 'default': {'Default': True} + } + } + return api_mock def mock_keep_services(self, api_mock=None, status=200, count=12, service_type='disk', diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py index 0eefa586d9..b1c42fd2b3 100644 --- a/sdk/python/tests/test_keep_client.py +++ b/sdk/python/tests/test_keep_client.py @@ -540,26 +540,49 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock): self.data = b'xyzzy' self.locator = '1271ed5ef305aadabc605b1609e24c52' + def test_multiple_default_storage_classes_req_header(self): + api_mock = self.api_client_mock() + api_mock.config.return_value = { + 'StorageClasses': { + 'foo': { 'Default': True }, + 'bar': { 'Default': True }, + 'baz': { 'Default': False } + } + } + api_client = self.mock_keep_services(api_mock=api_mock, count=2) + keep_client = arvados.KeepClient(api_client=api_client) + resp_hdr = { + 'x-keep-storage-classes-confirmed': 'foo=1, bar=1', + 'x-keep-replicas-stored': 1 + } + with tutil.mock_keep_responses(self.locator, 200, **resp_hdr) as mock: + keep_client.put(self.data, copies=1) + req_hdr = mock.responses[0] + self.assertIn( + 'X-Keep-Storage-Classes: bar, foo', req_hdr.getopt(pycurl.HTTPHEADER)) + def test_storage_classes_req_header(self): + self.assertEqual( + self.api_client.config()['StorageClasses'], + {'default': {'Default': True}}) cases = [ # requested, expected [['foo'], 'X-Keep-Storage-Classes: foo'], [['bar', 'foo'], 'X-Keep-Storage-Classes: bar, foo'], - [[], None], + [[], 'X-Keep-Storage-Classes: default'], + [None, 'X-Keep-Storage-Classes: default'], ] for req_classes, expected_header in cases: headers = {'x-keep-replicas-stored': 1} - if len(req_classes) > 0: + if req_classes is None or len(req_classes) == 0: + confirmed_hdr = 'default=1' + elif len(req_classes) > 0: confirmed_hdr = ', '.join(["{}=1".format(cls) for cls in req_classes]) - headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr}) + headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr}) with tutil.mock_keep_responses(self.locator, 200, **headers) as mock: self.keep_client.put(self.data, copies=1, classes=req_classes) - resp = mock.responses[0] - if expected_header is not None: - self.assertIn(expected_header, resp.getopt(pycurl.HTTPHEADER)) - else: - for hdr in resp.getopt(pycurl.HTTPHEADER): - self.assertNotRegex(hdr, r'^X-Keep-Storage-Classes.*') + req_hdr = mock.responses[0] + self.assertIn(expected_header, req_hdr.getopt(pycurl.HTTPHEADER)) def test_partial_storage_classes_put(self): headers = { @@ -1368,6 +1391,8 @@ class KeepClientAPIErrorTest(unittest.TestCase): return "abc" elif r == "insecure": return False + elif r == "config": + return lambda: {} else: raise arvados.errors.KeepReadError() keep_client = arvados.KeepClient(api_client=ApiMock(), diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb index f8898d63c9..409e48a6f0 100644 --- a/services/api/lib/record_filters.rb +++ b/services/api/lib/record_filters.rb @@ -47,9 +47,10 @@ module RecordFilters raise ArgumentError.new("Invalid operator '#{operator}' (#{operator.class}) in filter") end + operator = operator.downcase cond_out = [] - if attrs_in == 'any' && (operator.casecmp('ilike').zero? || operator.casecmp('like').zero?) && (operand.is_a? String) && operand.match('^[%].*[%]$') + if attrs_in == 'any' && (operator == 'ilike' || operator == 'like') && (operand.is_a? String) && operand.match('^[%].*[%]$') # Trigram index search cond_out << model_class.full_text_trgm + " #{operator} ?" param_out << operand @@ -85,9 +86,9 @@ module RecordFilters end # jsonb search - case operator.downcase + case operator when '=', '!=' - not_in = if operator.downcase == "!=" then "NOT " else "" end + not_in = if operator == "!=" then "NOT " else "" end cond_out << "#{not_in}(#{attr_table_name}.#{attr} @> ?::jsonb)" param_out << SafeJSON.dump({proppath => operand}) when 'in' @@ -134,7 +135,7 @@ module RecordFilters else raise ArgumentError.new("Invalid operator for subproperty search '#{operator}'") end - elsif operator.downcase == "exists" + elsif operator == "exists" if col.type != :jsonb raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter") end @@ -142,11 +143,12 @@ module RecordFilters cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)" param_out << operand else - if !attr_model_class.searchable_columns(operator).index attr + if !attr_model_class.searchable_columns(operator).index(attr) && + !(col.andand.type == :jsonb && ['contains', '=', '<>', '!='].index(operator)) raise ArgumentError.new("Invalid attribute '#{attr}' in filter") end - case operator.downcase + case operator when '=', '<', '<=', '>', '>=', '!=', 'like', 'ilike' attr_type = attr_model_class.attribute_column(attr).type operator = '<>' if operator == '!=' @@ -227,6 +229,26 @@ module RecordFilters end end cond_out << cond.join(' OR ') + when 'contains' + if col.andand.type != :jsonb + raise ArgumentError.new("Invalid attribute '#{attr}' for '#{operator}' operator") + end + if operand == [] + raise ArgumentError.new("Invalid operand '#{operand.inspect}' for '#{operator}' operator") + end + operand = [operand] unless operand.is_a? Array + operand.each do |op| + if !op.is_a?(String) + raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)") + end + end + # We use jsonb_exists_all(a,b) instead of "a ?& b" because + # the pg gem thinks "?" is a bind var. And we use string + # interpolation instead of param_out because the pg gem + # flattens param_out / doesn't support passing arrays as + # bind vars. + q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',') + cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])" else raise ArgumentError.new("Invalid operator '#{operator}'") end diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb index 2c9470d972..6c923ff38d 100644 --- a/services/api/test/functional/arvados/v1/collections_controller_test.rb +++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb @@ -1403,4 +1403,18 @@ EOS assert_response :success assert_equal col.version, json_response['version'], 'Trashing a collection should not create a new version' end + + ["storage_classes_desired", "storage_classes_confirmed"].each do |attr| + test "filter collections by #{attr}" do + authorize_with(:active) + get :index, params: { + filters: [[attr, "=", '["default"]']] + } + assert_response :success + assert_not_equal 0, json_response["items"].length + json_response["items"].each do |c| + assert_equal ["default"], c[attr] + end + end + end end diff --git a/services/api/test/functional/arvados/v1/filters_test.rb b/services/api/test/functional/arvados/v1/filters_test.rb index bcb1807867..dd8eeaa7be 100644 --- a/services/api/test/functional/arvados/v1/filters_test.rb +++ b/services/api/test/functional/arvados/v1/filters_test.rb @@ -247,4 +247,51 @@ class Arvados::V1::FiltersTest < ActionController::TestCase assert_includes(found, collections(:replication_desired_2_unconfirmed).uuid) assert_includes(found, collections(:replication_desired_2_confirmed_2).uuid) end + + [ + [1, "foo"], + [1, ["foo"]], + [1, ["bar"]], + [1, ["bar", "foo"]], + [0, ["foo", "qux"]], + [0, ["qux"]], + [nil, []], + [nil, [[]]], + [nil, [["bogus"]]], + [nil, [{"foo" => "bar"}]], + [nil, {"foo" => "bar"}], + ].each do |results, operand| + test "storage_classes_desired contains #{operand.inspect}" do + @controller = Arvados::V1::CollectionsController.new + authorize_with(:active) + c = Collection.create!( + manifest_text: "", + storage_classes_desired: ["foo", "bar", "baz"]) + get :index, params: { + filters: [["storage_classes_desired", "contains", operand]], + } + if results.nil? + assert_response 422 + next + end + assert_response :success + assert_equal results, json_response["items"].length + if results > 0 + assert_equal c.uuid, json_response["items"][0]["uuid"] + end + end + end + + test "collections properties contains top level key" do + @controller = Arvados::V1::CollectionsController.new + authorize_with(:active) + get :index, params: { + filters: [["properties", "contains", "prop1"]], + } + assert_response :success + assert_not_empty json_response["items"] + json_response["items"].each do |c| + assert c["properties"].has_key?("prop1") + end + end end