Merge branch '17217-collection-signatures' into main
authorTom Clegg <tom@curii.com>
Mon, 30 Aug 2021 17:29:33 +0000 (13:29 -0400)
committerTom Clegg <tom@curii.com>
Mon, 30 Aug 2021 17:29:33 +0000 (13:29 -0400)
closes #17217

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

build/run-build-packages-one-target.sh
doc/api/methods.html.textile.liquid
doc/api/methods/collections.html.textile.liquid
lib/config/export.go
sdk/python/arvados/collection.py
sdk/python/arvados/keep.py
sdk/python/tests/arvados_testutil.py
sdk/python/tests/test_keep_client.py
services/api/lib/record_filters.rb
services/api/test/functional/arvados/v1/collections_controller_test.rb
services/api/test/functional/arvados/v1/filters_test.rb

index 81aac9c616c11ea2894482b240c08495a577511d..7a91cb4de15eec13dbd524342b2bb20679666b0e 100755 (executable)
@@ -110,6 +110,7 @@ while [ $# -gt 0 ]; do
                 echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$"
                 exit 1
             else
+                [[ "$2" =~ (.*)-(.*) ]]
                 ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}"
                 ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}"
             fi
index 670a9e0da3d96ed16f8de9e053ae5a746cf0aa31..e051ab66fa7afa18d8e52b09741c292f5e1faa9c 100644 (file)
@@ -100,12 +100,16 @@ The following operators are available.
 
 table(table table-bordered table-condensed).
 |_. Operator|_. Operand type|_. Description|_. Example|
-|@=@, @!=@|string, number, timestamp, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ @["tail_uuid","!=",null]@|
+|@=@, @!=@, @<>@|string, number, timestamp, JSON-encoded array, JSON-encoded object, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@
+@["tail_uuid","!=",null]@
+@["storage_classes_desired","=","[\"default\"]"]@|
 |@<@, @<=@, @>=@, @>@|string, number, or timestamp|Ordering comparison|@["script_version",">","123"]@|
 |@like@, @ilike@|string|SQL pattern match.  Single character match is @_@ and wildcard is @%@. The @ilike@ operator is case-insensitive|@["script_version","like","d00220fb%"]@|
 |@in@, @not in@|array of strings|Set membership|@["script_version","in",["main","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@|
 |@is_a@|string|Arvados object type|@["head_uuid","is_a","arvados#collection"]@|
-|@exists@|string|Test if a subproperty is present.|@["properties","exists","my_subproperty"]@|
+|@exists@|string|Presence of subproperty|@["properties","exists","my_subproperty"]@|
+|@contains@|string, array of strings|Presence of one or more keys or array elements|@["storage_classes_desired", "contains", ["foo", "bar"]]@ (matches both @["foo", "bar"]@ and @["foo", "bar", "baz"]@)
+(note @[..., "contains", "foo"]@ is also accepted, and is equivalent to @[..., "contains", ["foo"]]@)|
 
 h4(#substringsearchfilter). Filtering using substring search
 
@@ -128,7 +132,7 @@ table(table table-bordered table-condensed).
 |@like@, @ilike@|string|SQL pattern match, single character match is @_@ and wildcard is @%@, ilike is case-insensitive|@["properties.my_subproperty", "like", "d00220fb%"]@|
 |@in@, @not in@|array of strings|Set membership|@["properties.my_subproperty", "in", ["fizz", "buzz"]]@|
 |@exists@|boolean|Test if a subproperty is present or not (determined by operand).|@["properties.my_subproperty", "exists", true]@|
-|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.|
+|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["properties.foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.|
 
 Note that exclusion filters @!=@ and @not in@ will return records for which the property is not defined at all.  To restrict filtering to records on which the subproperty is defined, combine with an @exists@ filter.
 
index fd4a36f291ae90641a2e48606af66b35311c0780..6c1cc691c333419e8504c77da50a2fbcc582bda5 100644 (file)
@@ -32,7 +32,10 @@ table(table table-bordered table-condensed).
 |manifest_text|text|||
 |replication_desired|number|Minimum storage replication level desired for each data block referenced by this collection. A value of @null@ signifies that the site default replication level (typically 2) is desired.|@2@|
 |replication_confirmed|number|Replication level most recently confirmed by the storage system. This field is null when a collection is first created, and is reset to null when the manifest_text changes in a way that introduces a new data block. An integer value indicates the replication level of the _least replicated_ data block in the collection.|@2@, null|
-|replication_confirmed_at|datetime|When replication_confirmed was confirmed. If replication_confirmed is null, this field is also null.||
+|replication_confirmed_at|datetime|When @replication_confirmed@ was confirmed. If @replication_confirmed@ is null, this field is also null.||
+|storage_classes_desired|list|An optional list of storage class names where the blocks should be saved. If not provided, the cluster's default storage class(es) will be set.|@['archival']@|
+|storage_classes_confirmed|list|Storage classes most recently confirmed by the storage system. This field is an empty list when a collection is first created.|@'archival']@, @[]@|
+|storage_classes_confirmed_at|datetime|When @storage_classes_confirmed@ was confirmed. If @storage_classes_confirmed@ is @[]@, this field is null.||
 |trash_at|datetime|If @trash_at@ is non-null and in the past, this collection will be hidden from API calls.  May be untrashed.||
 |delete_at|datetime|If @delete_at@ is non-null and in the past, the collection may be permanently deleted.||
 |is_trashed|boolean|True if @trash_at@ is in the past, false if not.||
index 065011cc2e8d31d2fb133f6546a32a8f506861b2..92e2d7b4d522e2b1a07b8a5602f3318f04720645 100644 (file)
@@ -105,7 +105,7 @@ var whitelist = map[string]bool{
        "Collections.PreserveVersionIfIdle":                   true,
        "Collections.S3FolderObjects":                         true,
        "Collections.TrashSweepInterval":                      false,
-       "Collections.TrustAllContent":                         false,
+       "Collections.TrustAllContent":                         true,
        "Collections.WebDAVCache":                             false,
        "Collections.KeepproxyPermission":                     false,
        "Collections.WebDAVPermission":                        false,
index 44d0325353b51eebb5e7846c4733ab7228c8a1a1..50cb703a56a5a0dc66a068593fc4d3ed4a855166 100644 (file)
@@ -1296,8 +1296,8 @@ class Collection(RichCollectionBase):
 
         :storage_classes_desired:
           A list of storage class names where to upload the data. If None,
-          the keepstores are expected to store the data into their default
-          storage class.
+          the keep client is expected to store the data into the cluster's
+          default storage class(es).
 
         """
 
index 86b1d91b8246ef20ed860cdb516ca5d4a53624b9..9dfe0436dec9bdf22eb71ad9bfe2e8a201ee3ab6 100644 (file)
@@ -842,6 +842,7 @@ class KeepClient(object):
         self.hits_counter = Counter()
         self.misses_counter = Counter()
         self._storage_classes_unsupported_warning = False
+        self._default_classes = []
 
         if local_store:
             self.local_store = local_store
@@ -882,6 +883,12 @@ class KeepClient(object):
                 self._writable_services = None
                 self.using_proxy = None
                 self._static_services_list = False
+                try:
+                    self._default_classes = [
+                        k for k, v in self.api_client.config()['StorageClasses'].items() if v['Default']]
+                except KeyError:
+                    # We're talking to an old cluster
+                    pass
 
     def current_timeout(self, attempt_number):
         """Return the appropriate timeout to use for this client.
@@ -1174,7 +1181,7 @@ class KeepClient(object):
                 "failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service")
 
     @retry.retry_method
-    def put(self, data, copies=2, num_retries=None, request_id=None, classes=[]):
+    def put(self, data, copies=2, num_retries=None, request_id=None, classes=None):
         """Save data in Keep.
 
         This method will get a list of Keep services from the API server, and
@@ -1195,6 +1202,8 @@ class KeepClient(object):
           be written.
         """
 
+        classes = classes or self._default_classes
+
         if not isinstance(data, bytes):
             data = data.encode()
 
index f251ea654b5fed6d0c5f9c837c720aa733b02baf..d9b3ca86c4f9055dde2fa9b54ad63ed65d16d755 100644 (file)
@@ -190,7 +190,13 @@ class MockStreamReader(object):
 
 class ApiClientMock(object):
     def api_client_mock(self):
-        return mock.MagicMock(name='api_client_mock')
+        api_mock = mock.MagicMock(name='api_client_mock')
+        api_mock.config.return_value = {
+            'StorageClasses': {
+                'default': {'Default': True}
+            }
+        }
+        return api_mock
 
     def mock_keep_services(self, api_mock=None, status=200, count=12,
                            service_type='disk',
index 0eefa586d9c436413e2e1934d9cf401e4ed17467..b1c42fd2b3a1475934a0c6090e12139750210f46 100644 (file)
@@ -540,26 +540,49 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock):
         self.data = b'xyzzy'
         self.locator = '1271ed5ef305aadabc605b1609e24c52'
 
+    def test_multiple_default_storage_classes_req_header(self):
+        api_mock = self.api_client_mock()
+        api_mock.config.return_value = {
+            'StorageClasses': {
+                'foo': { 'Default': True },
+                'bar': { 'Default': True },
+                'baz': { 'Default': False }
+            }
+        }
+        api_client = self.mock_keep_services(api_mock=api_mock, count=2)
+        keep_client = arvados.KeepClient(api_client=api_client)
+        resp_hdr = {
+            'x-keep-storage-classes-confirmed': 'foo=1, bar=1',
+            'x-keep-replicas-stored': 1
+        }
+        with tutil.mock_keep_responses(self.locator, 200, **resp_hdr) as mock:
+            keep_client.put(self.data, copies=1)
+            req_hdr = mock.responses[0]
+            self.assertIn(
+                'X-Keep-Storage-Classes: bar, foo', req_hdr.getopt(pycurl.HTTPHEADER))
+
     def test_storage_classes_req_header(self):
+        self.assertEqual(
+            self.api_client.config()['StorageClasses'],
+            {'default': {'Default': True}})
         cases = [
             # requested, expected
             [['foo'], 'X-Keep-Storage-Classes: foo'],
             [['bar', 'foo'], 'X-Keep-Storage-Classes: bar, foo'],
-            [[], None],
+            [[], 'X-Keep-Storage-Classes: default'],
+            [None, 'X-Keep-Storage-Classes: default'],
         ]
         for req_classes, expected_header in cases:
             headers = {'x-keep-replicas-stored': 1}
-            if len(req_classes) > 0:
+            if req_classes is None or len(req_classes) == 0:
+                confirmed_hdr = 'default=1'
+            elif len(req_classes) > 0:
                 confirmed_hdr = ', '.join(["{}=1".format(cls) for cls in req_classes])
-                headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
+            headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
             with tutil.mock_keep_responses(self.locator, 200, **headers) as mock:
                 self.keep_client.put(self.data, copies=1, classes=req_classes)
-                resp = mock.responses[0]
-                if expected_header is not None:
-                    self.assertIn(expected_header, resp.getopt(pycurl.HTTPHEADER))
-                else:
-                    for hdr in resp.getopt(pycurl.HTTPHEADER):
-                        self.assertNotRegex(hdr, r'^X-Keep-Storage-Classes.*')
+                req_hdr = mock.responses[0]
+                self.assertIn(expected_header, req_hdr.getopt(pycurl.HTTPHEADER))
 
     def test_partial_storage_classes_put(self):
         headers = {
@@ -1368,6 +1391,8 @@ class KeepClientAPIErrorTest(unittest.TestCase):
                     return "abc"
                 elif r == "insecure":
                     return False
+                elif r == "config":
+                    return lambda: {}
                 else:
                     raise arvados.errors.KeepReadError()
         keep_client = arvados.KeepClient(api_client=ApiMock(),
index f8898d63c90de2169fc8d18b53d40f68171ae945..409e48a6f090a3b348cd5d551bf35a91427e42a9 100644 (file)
@@ -47,9 +47,10 @@ module RecordFilters
         raise ArgumentError.new("Invalid operator '#{operator}' (#{operator.class}) in filter")
       end
 
+      operator = operator.downcase
       cond_out = []
 
-      if attrs_in == 'any' && (operator.casecmp('ilike').zero? || operator.casecmp('like').zero?) && (operand.is_a? String) && operand.match('^[%].*[%]$')
+      if attrs_in == 'any' && (operator == 'ilike' || operator == 'like') && (operand.is_a? String) && operand.match('^[%].*[%]$')
         # Trigram index search
         cond_out << model_class.full_text_trgm + " #{operator} ?"
         param_out << operand
@@ -85,9 +86,9 @@ module RecordFilters
           end
 
           # jsonb search
-          case operator.downcase
+          case operator
           when '=', '!='
-            not_in = if operator.downcase == "!=" then "NOT " else "" end
+            not_in = if operator == "!=" then "NOT " else "" end
             cond_out << "#{not_in}(#{attr_table_name}.#{attr} @> ?::jsonb)"
             param_out << SafeJSON.dump({proppath => operand})
           when 'in'
@@ -134,7 +135,7 @@ module RecordFilters
           else
             raise ArgumentError.new("Invalid operator for subproperty search '#{operator}'")
           end
-        elsif operator.downcase == "exists"
+        elsif operator == "exists"
           if col.type != :jsonb
             raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter")
           end
@@ -142,11 +143,12 @@ module RecordFilters
           cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)"
           param_out << operand
         else
-          if !attr_model_class.searchable_columns(operator).index attr
+          if !attr_model_class.searchable_columns(operator).index(attr) &&
+             !(col.andand.type == :jsonb && ['contains', '=', '<>', '!='].index(operator))
             raise ArgumentError.new("Invalid attribute '#{attr}' in filter")
           end
 
-          case operator.downcase
+          case operator
           when '=', '<', '<=', '>', '>=', '!=', 'like', 'ilike'
             attr_type = attr_model_class.attribute_column(attr).type
             operator = '<>' if operator == '!='
@@ -227,6 +229,26 @@ module RecordFilters
               end
             end
             cond_out << cond.join(' OR ')
+          when 'contains'
+            if col.andand.type != :jsonb
+              raise ArgumentError.new("Invalid attribute '#{attr}' for '#{operator}' operator")
+            end
+            if operand == []
+              raise ArgumentError.new("Invalid operand '#{operand.inspect}' for '#{operator}' operator")
+            end
+            operand = [operand] unless operand.is_a? Array
+            operand.each do |op|
+              if !op.is_a?(String)
+                raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)")
+              end
+            end
+            # We use jsonb_exists_all(a,b) instead of "a ?& b" because
+            # the pg gem thinks "?" is a bind var. And we use string
+            # interpolation instead of param_out because the pg gem
+            # flattens param_out / doesn't support passing arrays as
+            # bind vars.
+            q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',')
+            cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])"
           else
             raise ArgumentError.new("Invalid operator '#{operator}'")
           end
index 2c9470d9729455b347cb40438792640ae944b784..6c923ff38d96b8b64e4b8fa8ad83b13b3b29eefc 100644 (file)
@@ -1403,4 +1403,18 @@ EOS
     assert_response :success
     assert_equal col.version, json_response['version'], 'Trashing a collection should not create a new version'
   end
+
+  ["storage_classes_desired", "storage_classes_confirmed"].each do |attr|
+    test "filter collections by #{attr}" do
+      authorize_with(:active)
+      get :index, params: {
+            filters: [[attr, "=", '["default"]']]
+          }
+      assert_response :success
+      assert_not_equal 0, json_response["items"].length
+      json_response["items"].each do |c|
+        assert_equal ["default"], c[attr]
+      end
+    end
+  end
 end
index bcb18078674ffd27bb124772b4478ebecfff9a76..dd8eeaa7bead1e260d46e5da4142707792edd42a 100644 (file)
@@ -247,4 +247,51 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
     assert_includes(found, collections(:replication_desired_2_unconfirmed).uuid)
     assert_includes(found, collections(:replication_desired_2_confirmed_2).uuid)
   end
+
+  [
+    [1, "foo"],
+    [1, ["foo"]],
+    [1, ["bar"]],
+    [1, ["bar", "foo"]],
+    [0, ["foo", "qux"]],
+    [0, ["qux"]],
+    [nil, []],
+    [nil, [[]]],
+    [nil, [["bogus"]]],
+    [nil, [{"foo" => "bar"}]],
+    [nil, {"foo" => "bar"}],
+  ].each do |results, operand|
+    test "storage_classes_desired contains #{operand.inspect}" do
+      @controller = Arvados::V1::CollectionsController.new
+      authorize_with(:active)
+      c = Collection.create!(
+        manifest_text: "",
+        storage_classes_desired: ["foo", "bar", "baz"])
+      get :index, params: {
+            filters: [["storage_classes_desired", "contains", operand]],
+          }
+      if results.nil?
+        assert_response 422
+        next
+      end
+      assert_response :success
+      assert_equal results, json_response["items"].length
+      if results > 0
+        assert_equal c.uuid, json_response["items"][0]["uuid"]
+      end
+    end
+  end
+
+  test "collections properties contains top level key" do
+    @controller = Arvados::V1::CollectionsController.new
+    authorize_with(:active)
+    get :index, params: {
+          filters: [["properties", "contains", "prop1"]],
+        }
+    assert_response :success
+    assert_not_empty json_response["items"]
+    json_response["items"].each do |c|
+      assert c["properties"].has_key?("prop1")
+    end
+  end
 end