Merge branch '17994-filter-by-storage-classes' into main
authorTom Clegg <tom@curii.com>
Mon, 30 Aug 2021 17:27:03 +0000 (13:27 -0400)
committerTom Clegg <tom@curii.com>
Mon, 30 Aug 2021 17:27:03 +0000 (13:27 -0400)
closes #17994

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

29 files changed:
apps/workbench/test/controllers/work_units_controller_test.rb
apps/workbench/test/integration/work_units_test.rb
build/run-build-packages-one-target.sh
doc/admin/upgrading.html.textile.liquid
doc/api/methods.html.textile.liquid
doc/api/methods/collections.html.textile.liquid
lib/config/export.go
lib/controller/integration_test.go
lib/crunchrun/singularity.go
sdk/python/arvados/collection.py
sdk/python/arvados/keep.py
sdk/python/tests/arvados_testutil.py
sdk/python/tests/test_keep_client.py
services/api/app/models/api_client_authorization.rb
services/api/db/migrate/20210816191509_drop_fts_index.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/record_filters.rb
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/pipeline_instances.yml
services/api/test/functional/arvados/v1/filters_test.rb
services/api/test/integration/collections_api_test.rb
services/api/test/integration/groups_test.rb
services/api/test/unit/arvados_model_test.rb
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/volume.go
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/service/workbench2/run-service
tools/test-collection-create/test-collection-create.py [new file with mode: 0644]

index 6f74955cd1c8d0940c979b70867a6cbbfda5aacb..0191c7f0df6f768959e7716e95abd68128e21bf9 100644 (file)
@@ -13,26 +13,26 @@ class WorkUnitsControllerTest < ActionController::TestCase
   [
     ['foo', 10, 25,
       ['/pipeline_instances/zzzzz-d1hrv-1xfj6xkicf2muk2',
-       '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+       '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7'],
       ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
     ['pipeline_with_tagged_collection_input', 1, 1,
       ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3'],
-      ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+      ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
     ['no_such_match', 0, 0,
       [],
-      ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+      ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
   ].each do |search_filter, expected_min, expected_max, expected, not_expected|
     test "all_processes page for search filter '#{search_filter}'" do
-      work_units_index(filters: [['any','@@', search_filter]], show_children: true)
+      work_units_index(filters: [['any','ilike', "%#{search_filter}%"]], show_children: true)
       assert_response :success
 
       # Verify that expected number of processes are found
index 4f2ebbc554d624440cd4dc5251667c7c5ecadfba..36b29468ff8b1012d32232b1031ee8f2cf4f6ab3 100644 (file)
@@ -14,7 +14,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
 
   [[true, 25, 100,
     ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
-     '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+     '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
      '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7',
      '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
      '/container_requests/zzzzz-xvhdp-cr4completedcr2',
@@ -23,7 +23,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
      '/container_requests/zzzzz-xvhdp-oneof60crs00001']],
    [false, 25, 100,
     ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
-     '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+     '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
      '/container_requests/zzzzz-xvhdp-cr4completedcr2'],
     ['/pipeline_instances/zzzzz-d1hrv-scarxiyajtshq3l',
      '/container_requests/zzzzz-xvhdp-oneof60crs00001',
index 81aac9c616c11ea2894482b240c08495a577511d..7a91cb4de15eec13dbd524342b2bb20679666b0e 100755 (executable)
@@ -110,6 +110,7 @@ while [ $# -gt 0 ]; do
                 echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$"
                 exit 1
             else
+                [[ "$2" =~ (.*)-(.*) ]]
                 ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}"
                 ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}"
             fi
index 8435e2871f882fd6588b709711182443dcc8f2f3..9e7410260f8955ea35f1d6e4790e792feb1c670f 100644 (file)
@@ -39,6 +39,10 @@ h2(#main). development main (as of 2021-07-15)
 
 "Upgrading from 2.2.0":#v2_2_0
 
+h3. Removed deprecated '@@' search operator
+
+The '@@' full text search operator, previously deprecated, has been removed. To perform a string search across multiple columns, use the 'ilike' operator on 'any' column as described in the "available list method filter section":{{site.baseurl}}/api/methods.html#substringsearchfilter of the API documentation.
+
 h3. Storage classes must be defined explicitly
 
 If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details.
index 7ca216bf13d9ccb27f18b1f1122374eda7d47913..e051ab66fa7afa18d8e52b09741c292f5e1faa9c 100644 (file)
@@ -96,7 +96,7 @@ table(table table-bordered table-condensed).
 |1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@|
 |2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@|
 
-The following operators are available.[1]
+The following operators are available.
 
 table(table table-bordered table-condensed).
 |_. Operator|_. Operand type|_. Description|_. Example|
@@ -171,5 +171,3 @@ table(table table-bordered table-condensed).
 |_. Argument |_. Type |_. Description |_. Location |
 {background:#ccffcc}.|uuid|string|The UUID of the resource in question.|path||
 |{resource_type}|object||query||
-
-fn1^. NOTE: The filter operator for full-text search (@@) which previously worked (but was undocumented) is deprecated and will be removed in a future release.
index fd4a36f291ae90641a2e48606af66b35311c0780..6c1cc691c333419e8504c77da50a2fbcc582bda5 100644 (file)
@@ -32,7 +32,10 @@ table(table table-bordered table-condensed).
 |manifest_text|text|||
 |replication_desired|number|Minimum storage replication level desired for each data block referenced by this collection. A value of @null@ signifies that the site default replication level (typically 2) is desired.|@2@|
 |replication_confirmed|number|Replication level most recently confirmed by the storage system. This field is null when a collection is first created, and is reset to null when the manifest_text changes in a way that introduces a new data block. An integer value indicates the replication level of the _least replicated_ data block in the collection.|@2@, null|
-|replication_confirmed_at|datetime|When replication_confirmed was confirmed. If replication_confirmed is null, this field is also null.||
+|replication_confirmed_at|datetime|When @replication_confirmed@ was confirmed. If @replication_confirmed@ is null, this field is also null.||
+|storage_classes_desired|list|An optional list of storage class names where the blocks should be saved. If not provided, the cluster's default storage class(es) will be set.|@['archival']@|
+|storage_classes_confirmed|list|Storage classes most recently confirmed by the storage system. This field is an empty list when a collection is first created.|@'archival']@, @[]@|
+|storage_classes_confirmed_at|datetime|When @storage_classes_confirmed@ was confirmed. If @storage_classes_confirmed@ is @[]@, this field is null.||
 |trash_at|datetime|If @trash_at@ is non-null and in the past, this collection will be hidden from API calls.  May be untrashed.||
 |delete_at|datetime|If @delete_at@ is non-null and in the past, the collection may be permanently deleted.||
 |is_trashed|boolean|True if @trash_at@ is in the past, false if not.||
index 065011cc2e8d31d2fb133f6546a32a8f506861b2..92e2d7b4d522e2b1a07b8a5602f3318f04720645 100644 (file)
@@ -105,7 +105,7 @@ var whitelist = map[string]bool{
        "Collections.PreserveVersionIfIdle":                   true,
        "Collections.S3FolderObjects":                         true,
        "Collections.TrashSweepInterval":                      false,
-       "Collections.TrustAllContent":                         false,
+       "Collections.TrustAllContent":                         true,
        "Collections.WebDAVCache":                             false,
        "Collections.KeepproxyPermission":                     false,
        "Collections.WebDAVPermission":                        false,
index 26f0dbb0d1388da1886cea726fc644648b4d57e3..6851442054e1f49e8cde8c87dcced6d9eea0918a 100644 (file)
@@ -20,6 +20,7 @@ import (
        "path/filepath"
        "strconv"
        "strings"
+       "sync"
 
        "git.arvados.org/arvados.git/lib/boot"
        "git.arvados.org/arvados.git/lib/config"
@@ -187,6 +188,49 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
        c.Check(coll.PortableDataHash, check.Equals, pdh)
 }
 
+// Tests bug #18004
+func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) {
+       conn1 := s.testClusters["z1111"].Conn()
+       rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+       rootctx2, _, _ := s.testClusters["z2222"].RootClients()
+       conn2 := s.testClusters["z2222"].Conn()
+       userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2@example.com", true)
+
+       var wg1, wg2 sync.WaitGroup
+       creqs := 100
+
+       // Make concurrent requests to z2222 with a local token to make sure more
+       // than one worker is listening.
+       wg1.Add(1)
+       for i := 0; i < creqs; i++ {
+               wg2.Add(1)
+               go func() {
+                       defer wg2.Done()
+                       wg1.Wait()
+                       _, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{})
+                       c.Check(err, check.IsNil, check.Commentf("warm up phase failed"))
+               }()
+       }
+       wg1.Done()
+       wg2.Wait()
+
+       // Real test pass -- use a new remote token than the one used in the warm-up
+       // phase.
+       wg1.Add(1)
+       for i := 0; i < creqs; i++ {
+               wg2.Add(1)
+               go func() {
+                       defer wg2.Done()
+                       wg1.Wait()
+                       // Retrieve the remote collection from cluster z2222.
+                       _, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{})
+                       c.Check(err, check.IsNil, check.Commentf("testing phase failed"))
+               }()
+       }
+       wg1.Done()
+       wg2.Wait()
+}
+
 func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) {
        if _, err := exec.LookPath("s3cmd"); err != nil {
                c.Skip("s3cmd not in PATH")
@@ -502,7 +546,7 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
 }
 
 // We test the direct access to the database
-// normally an integration test would not have a database access, but  in this case we need
+// normally an integration test would not have a database access, but in this case we need
 // to test tokens that are secret, so there is no API response that will give them back
 func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) {
        ctx := context.Background()
index 741f542454e470ede35cc6f682c64c8a9b1bbf09..61fecad0a13c06664890a9cf2dfffb8346b7a47e 100644 (file)
@@ -101,7 +101,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar
        if len(cl.Items) == 1 {
                imageCollection = cl.Items[0]
        } else {
-               collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339)
+               collectionName := "converting " + collectionName
                exp := time.Now().Add(24 * 7 * 2 * time.Hour)
                err = containerClient.RequestAndDecode(&imageCollection,
                        arvados.EndpointCollectionCreate.Method,
@@ -112,6 +112,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar
                                        "name":       collectionName,
                                        "trash_at":   exp.UTC().Format(time.RFC3339),
                                },
+                               "ensure_unique_name": true,
                        })
                if err != nil {
                        return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err)
@@ -141,6 +142,12 @@ func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath s
        }
 
        if _, err := os.Stat(imageFilename); os.IsNotExist(err) {
+               // Make sure the docker image is readable, and error
+               // out if not.
+               if _, err := os.Stat(imageTarballPath); err != nil {
+                       return err
+               }
+
                e.logf("building singularity image")
                // "singularity build" does not accept a
                // docker-archive://... filename containing a ":" character,
index 44d0325353b51eebb5e7846c4733ab7228c8a1a1..50cb703a56a5a0dc66a068593fc4d3ed4a855166 100644 (file)
@@ -1296,8 +1296,8 @@ class Collection(RichCollectionBase):
 
         :storage_classes_desired:
           A list of storage class names where to upload the data. If None,
-          the keepstores are expected to store the data into their default
-          storage class.
+          the keep client is expected to store the data into the cluster's
+          default storage class(es).
 
         """
 
index 86b1d91b8246ef20ed860cdb516ca5d4a53624b9..9dfe0436dec9bdf22eb71ad9bfe2e8a201ee3ab6 100644 (file)
@@ -842,6 +842,7 @@ class KeepClient(object):
         self.hits_counter = Counter()
         self.misses_counter = Counter()
         self._storage_classes_unsupported_warning = False
+        self._default_classes = []
 
         if local_store:
             self.local_store = local_store
@@ -882,6 +883,12 @@ class KeepClient(object):
                 self._writable_services = None
                 self.using_proxy = None
                 self._static_services_list = False
+                try:
+                    self._default_classes = [
+                        k for k, v in self.api_client.config()['StorageClasses'].items() if v['Default']]
+                except KeyError:
+                    # We're talking to an old cluster
+                    pass
 
     def current_timeout(self, attempt_number):
         """Return the appropriate timeout to use for this client.
@@ -1174,7 +1181,7 @@ class KeepClient(object):
                 "failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service")
 
     @retry.retry_method
-    def put(self, data, copies=2, num_retries=None, request_id=None, classes=[]):
+    def put(self, data, copies=2, num_retries=None, request_id=None, classes=None):
         """Save data in Keep.
 
         This method will get a list of Keep services from the API server, and
@@ -1195,6 +1202,8 @@ class KeepClient(object):
           be written.
         """
 
+        classes = classes or self._default_classes
+
         if not isinstance(data, bytes):
             data = data.encode()
 
index f251ea654b5fed6d0c5f9c837c720aa733b02baf..d9b3ca86c4f9055dde2fa9b54ad63ed65d16d755 100644 (file)
@@ -190,7 +190,13 @@ class MockStreamReader(object):
 
 class ApiClientMock(object):
     def api_client_mock(self):
-        return mock.MagicMock(name='api_client_mock')
+        api_mock = mock.MagicMock(name='api_client_mock')
+        api_mock.config.return_value = {
+            'StorageClasses': {
+                'default': {'Default': True}
+            }
+        }
+        return api_mock
 
     def mock_keep_services(self, api_mock=None, status=200, count=12,
                            service_type='disk',
index 0eefa586d9c436413e2e1934d9cf401e4ed17467..b1c42fd2b3a1475934a0c6090e12139750210f46 100644 (file)
@@ -540,26 +540,49 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock):
         self.data = b'xyzzy'
         self.locator = '1271ed5ef305aadabc605b1609e24c52'
 
+    def test_multiple_default_storage_classes_req_header(self):
+        api_mock = self.api_client_mock()
+        api_mock.config.return_value = {
+            'StorageClasses': {
+                'foo': { 'Default': True },
+                'bar': { 'Default': True },
+                'baz': { 'Default': False }
+            }
+        }
+        api_client = self.mock_keep_services(api_mock=api_mock, count=2)
+        keep_client = arvados.KeepClient(api_client=api_client)
+        resp_hdr = {
+            'x-keep-storage-classes-confirmed': 'foo=1, bar=1',
+            'x-keep-replicas-stored': 1
+        }
+        with tutil.mock_keep_responses(self.locator, 200, **resp_hdr) as mock:
+            keep_client.put(self.data, copies=1)
+            req_hdr = mock.responses[0]
+            self.assertIn(
+                'X-Keep-Storage-Classes: bar, foo', req_hdr.getopt(pycurl.HTTPHEADER))
+
     def test_storage_classes_req_header(self):
+        self.assertEqual(
+            self.api_client.config()['StorageClasses'],
+            {'default': {'Default': True}})
         cases = [
             # requested, expected
             [['foo'], 'X-Keep-Storage-Classes: foo'],
             [['bar', 'foo'], 'X-Keep-Storage-Classes: bar, foo'],
-            [[], None],
+            [[], 'X-Keep-Storage-Classes: default'],
+            [None, 'X-Keep-Storage-Classes: default'],
         ]
         for req_classes, expected_header in cases:
             headers = {'x-keep-replicas-stored': 1}
-            if len(req_classes) > 0:
+            if req_classes is None or len(req_classes) == 0:
+                confirmed_hdr = 'default=1'
+            elif len(req_classes) > 0:
                 confirmed_hdr = ', '.join(["{}=1".format(cls) for cls in req_classes])
-                headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
+            headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
             with tutil.mock_keep_responses(self.locator, 200, **headers) as mock:
                 self.keep_client.put(self.data, copies=1, classes=req_classes)
-                resp = mock.responses[0]
-                if expected_header is not None:
-                    self.assertIn(expected_header, resp.getopt(pycurl.HTTPHEADER))
-                else:
-                    for hdr in resp.getopt(pycurl.HTTPHEADER):
-                        self.assertNotRegex(hdr, r'^X-Keep-Storage-Classes.*')
+                req_hdr = mock.responses[0]
+                self.assertIn(expected_header, req_hdr.getopt(pycurl.HTTPHEADER))
 
     def test_partial_storage_classes_put(self):
         headers = {
@@ -1368,6 +1391,8 @@ class KeepClientAPIErrorTest(unittest.TestCase):
                     return "abc"
                 elif r == "insecure":
                     return False
+                elif r == "config":
+                    return lambda: {}
                 else:
                     raise arvados.errors.KeepReadError()
         keep_client = arvados.KeepClient(api_client=ApiMock(),
index 52f2cee064905fd6a81e4e9e60a774dfc80bab55..7c7ed759c60058b5915ad1d56505dba6b56d84dd 100644 (file)
@@ -319,7 +319,17 @@ class ApiClientAuthorization < ArvadosModel
         user.last_name = "from cluster #{remote_user_prefix}"
       end
 
-      user.save!
+      begin
+        user.save!
+      rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique
+        Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...")
+        # Some other request won the race: retry fetching the user record.
+        user = User.find_by_uuid(remote_user['uuid'])
+        if !user
+          Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}")
+          return nil
+        end
+      end
 
       if user.is_invited && !remote_user['is_invited']
         # Remote user is not "invited" state, they should be unsetup, which
@@ -364,12 +374,24 @@ class ApiClientAuthorization < ArvadosModel
       exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh,
              remote_token.andand['expires_at']].compact.min
       scopes = remote_token.andand['scopes'] || ['all']
-      auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
-        auth.user = user
-        auth.api_token = stored_secret
-        auth.api_client_id = 0
-        auth.scopes = scopes
-        auth.expires_at = exp
+      begin
+        retries ||= 0
+        auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
+          auth.user = user
+          auth.api_token = stored_secret
+          auth.api_client_id = 0
+          auth.scopes = scopes
+          auth.expires_at = exp
+        end
+      rescue ActiveRecord::RecordNotUnique
+        Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...")
+        # Some other request won the race: retry just once before erroring out
+        if (retries += 1) <= 1
+          retry
+        else
+          Rails.logger.warn("cannot find or create cached remote token #{token_uuid}")
+          return nil
+        end
       end
       auth.update_attributes!(user: user,
                               api_token: stored_secret,
diff --git a/services/api/db/migrate/20210816191509_drop_fts_index.rb b/services/api/db/migrate/20210816191509_drop_fts_index.rb
new file mode 100644 (file)
index 0000000..4ee1f55
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class DropFtsIndex < ActiveRecord::Migration[5.2]
+  def fts_indexes
+    {
+      "collections" => "collections_full_text_search_idx",
+      "container_requests" => "container_requests_full_text_search_idx",
+      "groups" => "groups_full_text_search_idx",
+      "jobs" => "jobs_full_text_search_idx",
+      "pipeline_instances" => "pipeline_instances_full_text_search_idx",
+      "pipeline_templates" => "pipeline_templates_full_text_search_idx",
+      "workflows" => "workflows_full_text_search_idx",
+    }
+  end
+
+  def up
+    fts_indexes.keys.each do |t|
+      i = fts_indexes[t]
+      execute "DROP INDEX IF EXISTS #{i}"
+    end
+  end
+
+  def down
+    fts_indexes.keys.each do |t|
+      i = fts_indexes[t]
+      execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector})"
+    end
+  end
+end
index 2bca887212a331143065d117816b81dc383f9b91..2f7748335694310b09de911104a446ce54885093 100644 (file)
@@ -238,29 +238,6 @@ SET default_tablespace = '';
 
 SET default_with_oids = false;
 
---
--- Name: groups; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.groups (
-    id integer NOT NULL,
-    uuid character varying(255),
-    owner_uuid character varying(255),
-    created_at timestamp without time zone NOT NULL,
-    modified_by_client_uuid character varying(255),
-    modified_by_user_uuid character varying(255),
-    modified_at timestamp without time zone,
-    name character varying(255) NOT NULL,
-    description character varying(524288),
-    updated_at timestamp without time zone NOT NULL,
-    group_class character varying(255),
-    trash_at timestamp without time zone,
-    is_trashed boolean DEFAULT false NOT NULL,
-    delete_at timestamp without time zone,
-    properties jsonb DEFAULT '{}'::jsonb
-);
-
-
 --
 -- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: -
 --
@@ -571,6 +548,29 @@ CREATE SEQUENCE public.containers_id_seq
 ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id;
 
 
+--
+-- Name: groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.groups (
+    id integer NOT NULL,
+    uuid character varying(255),
+    owner_uuid character varying(255),
+    created_at timestamp without time zone NOT NULL,
+    modified_by_client_uuid character varying(255),
+    modified_by_user_uuid character varying(255),
+    modified_at timestamp without time zone,
+    name character varying(255) NOT NULL,
+    description character varying(524288),
+    updated_at timestamp without time zone NOT NULL,
+    group_class character varying(255),
+    trash_at timestamp without time zone,
+    is_trashed boolean DEFAULT false NOT NULL,
+    delete_at timestamp without time zone,
+    properties jsonb DEFAULT '{}'::jsonb
+);
+
+
 --
 -- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: -
 --
@@ -1722,13 +1722,6 @@ CREATE INDEX authorized_keys_search_index ON public.authorized_keys USING btree
 CREATE INDEX collection_index_on_properties ON public.collections USING gin (properties);
 
 
---
--- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -1743,13 +1736,6 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u
 CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
 
 
---
--- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: -
 --
@@ -1785,13 +1771,6 @@ CREATE INDEX containers_search_index ON public.containers USING btree (uuid, own
 CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
 
 
---
--- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: groups_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2779,13 +2758,6 @@ CREATE UNIQUE INDEX index_workflows_on_uuid ON public.workflows USING btree (uui
 CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, job_uuid, created_by_job_task_uuid);
 
 
---
--- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: jobs_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2877,13 +2849,6 @@ CREATE INDEX permission_target ON public.materialized_permissions USING btree (t
 CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid);
 
 
---
--- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: pipeline_instances_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2905,13 +2870,6 @@ CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instance
 CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_templates USING btree (owner_uuid, name);
 
 
---
--- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: pipeline_templates_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2968,13 +2926,6 @@ CREATE INDEX users_search_index ON public.users USING btree (uuid, owner_uuid, m
 CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, hostname);
 
 
---
--- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: workflows_search_idx; Type: INDEX; Schema: public; Owner: -
 --
@@ -3194,6 +3145,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20201202174753'),
 ('20210108033940'),
 ('20210126183521'),
-('20210621204455');
+('20210621204455'),
+('20210816191509');
 
 
index 8f4244f5b85a892fef270d7818d0eada7d972c0f..409e48a6f090a3b348cd5d551bf35a91427e42a9 100644 (file)
@@ -31,7 +31,10 @@ module RecordFilters
     model_table_name = model_class.table_name
     filters.each do |filter|
       attrs_in, operator, operand = filter
-      if attrs_in == 'any' && operator != '@@'
+      if operator == '@@'
+        raise ArgumentError.new("Full text search operator is no longer supported")
+      end
+      if attrs_in == 'any'
         attrs = model_class.searchable_columns(operator)
       elsif attrs_in.is_a? Array
         attrs = attrs_in
@@ -55,22 +58,6 @@ module RecordFilters
         attrs = []
       end
 
-      if operator == '@@'
-        # Full-text search
-        if attrs_in != 'any'
-          raise ArgumentError.new("Full text search on individual columns is not supported")
-        end
-        if operand.is_a? Array
-          raise ArgumentError.new("Full text search not supported for array operands")
-        end
-
-        # Skip the generic per-column operator loop below
-        attrs = []
-        # Use to_tsquery since plainto_tsquery does not support prefix
-        # search. And, split operand and join the words with ' & '
-        cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)"
-        param_out << operand.split.join(' & ')
-      end
       attrs.each do |attr|
         subproperty = attr.split(".", 2)
 
index 9b067aa263d2baede05c8a325560117a7d9df109..ab76417902214162506707d3e642f93539ffe7ed 100644 (file)
@@ -521,7 +521,7 @@ running_job_in_publicly_accessible_project:
   uuid: zzzzz-8i9sb-n7omg50bvt0m1nf
   owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: active/foo
+  repository: active/bar
   script: running_job_script
   script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
   state: Running
index 0865503281fe247f0fd027d4054d846a9370e9cf..9621b3effc1c74f0b832c021b3c9d2b99ef11586 100644 (file)
@@ -111,12 +111,9 @@ has_job:
 components_is_jobspec:
   # Helps test that clients cope with funny-shaped components.
   # For an example, see #3321.
-  uuid: zzzzz-d1hrv-jobspeccomponts
-  created_at: <%= 30.minute.ago.to_s(:db) %>
+  uuid: zzzzz-d1hrv-1yfj61234abcdk4
+  created_at: <%= 2.minute.ago.to_s(:db) %>
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: 2014-04-14 12:35:04 -0400
-  updated_at: 2014-04-14 12:35:04 -0400
-  modified_at: 2014-04-14 12:35:04 -0400
   modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   state: RunningOnServer
index 04d0329b26caac6ce30f500b091602a727922190..dd8eeaa7bead1e260d46e5da4142707792edd42a 100644 (file)
@@ -29,34 +29,14 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
                  json_response['errors'].join(' '))
   end
 
-  test 'error message for full text search on a specific column' do
+  test 'error message for unsupported full text search' do
     @controller = Arvados::V1::CollectionsController.new
     authorize_with :active
     get :index, params: {
       filters: [['uuid', '@@', 'abcdef']],
     }
     assert_response 422
-    assert_match(/not supported/, json_response['errors'].join(' '))
-  end
-
-  test 'difficult characters in full text search' do
-    @controller = Arvados::V1::CollectionsController.new
-    authorize_with :active
-    get :index, params: {
-      filters: [['any', '@@', 'a|b"c']],
-    }
-    assert_response :success
-    # (Doesn't matter so much which results are returned.)
-  end
-
-  test 'array operand in full text search' do
-    @controller = Arvados::V1::CollectionsController.new
-    authorize_with :active
-    get :index, params: {
-      filters: [['any', '@@', ['abc', 'def']]],
-    }
-    assert_response 422
-    assert_match(/not supported/, json_response['errors'].join(' '))
+    assert_match(/no longer supported/, json_response['errors'].join(' '))
   end
 
   test 'api responses provide timestamps with nanoseconds' do
@@ -100,58 +80,6 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
     end
   end
 
-  test "full text search with count='none'" do
-    @controller = Arvados::V1::GroupsController.new
-    authorize_with :admin
-
-    get :contents, params: {
-      format: :json,
-      count: 'none',
-      limit: 1000,
-      filters: [['any', '@@', Rails.configuration.ClusterID]],
-    }
-
-    assert_response :success
-
-    all_objects = Hash.new(0)
-    json_response['items'].map{|o| o['kind']}.each{|t| all_objects[t] += 1}
-
-    assert_equal true, all_objects['arvados#group']>0
-    assert_equal true, all_objects['arvados#job']>0
-    assert_equal true, all_objects['arvados#pipelineInstance']>0
-    assert_equal true, all_objects['arvados#pipelineTemplate']>0
-
-    # Perform test again mimicking a second page request with:
-    # last_object_class = PipelineInstance
-    #   and hence groups and jobs should not be included in the response
-    # offset = 5, which means first 5 pipeline instances were already received in page 1
-    #   and hence the remaining pipeline instances and all other object types should be included in the response
-
-    @test_counter = 0  # Reset executed action counter
-
-    @controller = Arvados::V1::GroupsController.new
-
-    get :contents, params: {
-      format: :json,
-      count: 'none',
-      limit: 1000,
-      offset: '5',
-      last_object_class: 'PipelineInstance',
-      filters: [['any', '@@', Rails.configuration.ClusterID]],
-    }
-
-    assert_response :success
-
-    second_page = Hash.new(0)
-    json_response['items'].map{|o| o['kind']}.each{|t| second_page[t] += 1}
-
-    assert_equal false, second_page.include?('arvados#group')
-    assert_equal false, second_page.include?('arvados#job')
-    assert_equal true, second_page['arvados#pipelineInstance']>0
-    assert_equal all_objects['arvados#pipelineInstance'], second_page['arvados#pipelineInstance']+5
-    assert_equal true, second_page['arvados#pipelineTemplate']>0
-  end
-
   [['prop1', '=', 'value1', [:collection_with_prop1_value1], [:collection_with_prop1_value2, :collection_with_prop2_1]],
    ['prop1', '!=', 'value1', [:collection_with_prop1_value2, :collection_with_prop2_1], [:collection_with_prop1_value1]],
    ['prop1', 'exists', true, [:collection_with_prop1_value1, :collection_with_prop1_value2, :collection_with_prop1_value3, :collection_with_prop1_other1], [:collection_with_prop2_1]],
index 73cbad64303391e82ef593d7a9cffc080ae6084f..070e964e538c6d0f23992b5d1426be7f88f7146d 100644 (file)
@@ -373,75 +373,6 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "search collection using full text search" do
-    # create collection to be searched for
-    signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active))
-    post "/arvados/v1/collections",
-      params: {
-        format: :json,
-        collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json,
-      },
-      headers: auth(:active)
-    assert_response :success
-    assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
-
-    # search using the filename
-    search_using_full_text_search 'subdir2', 0
-    search_using_full_text_search 'subdir2:*', 1
-    search_using_full_text_search 'subdir2/subdir3/subdir4', 1
-    search_using_full_text_search 'file4:*', 1
-    search_using_full_text_search 'file4_in_subdir4.txt', 1
-    search_using_full_text_search 'subdir2 file4:*', 0      # first word is incomplete
-    search_using_full_text_search 'subdir2/subdir3/subdir4 file4:*', 1
-    search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1
-    search_using_full_text_search 'ile4', 0                 # not a prefix match
-  end
-
-  def search_using_full_text_search search_filter, expected_items
-    get '/arvados/v1/collections',
-      params: {:filters => [['any', '@@', search_filter]].to_json},
-      headers: auth(:active)
-    assert_response :success
-    response_items = json_response['items']
-    assert_not_nil response_items
-    if expected_items == 0
-      assert_empty response_items
-    else
-      refute_empty response_items
-      first_item = response_items.first
-      assert_not_nil first_item
-    end
-  end
-
-  # search for the filename in the file_names column and expect error
-  test "full text search not supported for individual columns" do
-    get '/arvados/v1/collections',
-      params: {:filters => [['name', '@@', 'General']].to_json},
-      headers: auth(:active)
-    assert_response 422
-  end
-
-  [
-    'quick fox',
-    'quick_brown fox',
-    'brown_ fox',
-    'fox dogs',
-  ].each do |search_filter|
-    test "full text search ignores special characters and finds with filter #{search_filter}" do
-      # description: The quick_brown_fox jumps over the lazy_dog
-      # full text search treats '_' as space apparently
-      get '/arvados/v1/collections',
-        params: {:filters => [['any', '@@', search_filter]].to_json},
-        headers: auth(:active)
-      assert_response 200
-      response_items = json_response['items']
-      assert_not_nil response_items
-      first_item = response_items.first
-      refute_empty first_item
-      assert_equal first_item['description'], 'The quick_brown_fox jumps over the lazy_dog'
-    end
-  end
-
   test "create and get collection with properties" do
     # create collection to be searched for
     signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
index aa67166f7e613a7b71f1ce8b798cf3b23b060e4a..e76f2b54068ad729fe94f87a3d2150846674db0b 100644 (file)
@@ -64,46 +64,6 @@ class GroupsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  [
-    ['Collection_', true],            # collections and pipelines templates
-    ['hash', true],                   # pipeline templates
-    ['fa7aeb5140e2848d39b', false],   # script_parameter of pipeline instances
-    ['fa7aeb5140e2848d39b:*', true],  # script_parameter of pipeline instances
-    ['project pipeline', true],       # finds "Completed pipeline in A Project"
-    ['project pipeli:*', true],       # finds "Completed pipeline in A Project"
-    ['proje pipeli:*', false],        # first word is incomplete, so no prefix match
-    ['no-such-thing', false],         # script_parameter of pipeline instances
-  ].each do |search_filter, expect_results|
-    test "full text search of group-owned objects for #{search_filter}" do
-      get "/arvados/v1/groups/contents",
-        params: {
-          id: groups(:aproject).uuid,
-          limit: 5,
-          :filters => [['any', '@@', search_filter]].to_json
-        },
-        headers: auth(:active)
-      assert_response :success
-      if expect_results
-        refute_empty json_response['items']
-        json_response['items'].each do |item|
-          assert item['uuid']
-          assert_equal groups(:aproject).uuid, item['owner_uuid']
-        end
-      else
-        assert_empty json_response['items']
-      end
-    end
-  end
-
-  test "full text search is not supported for individual columns" do
-    get "/arvados/v1/groups/contents",
-      params: {
-        :filters => [['name', '@@', 'Private']].to_json
-      },
-      headers: auth(:active)
-    assert_response 422
-  end
-
   test "group contents with include trash collections" do
     get "/arvados/v1/groups/contents",
       params: {
index 64f78071350a6736994986eff3267c541e72b4f6..1e2e08059ef92c75827bcea9baa5d95edc2945c4 100644 (file)
@@ -155,51 +155,6 @@ class ArvadosModelTest < ActiveSupport::TestCase
     end
   end
 
-  test "full text search index exists on models" do
-    indexes = {}
-    conn = ActiveRecord::Base.connection
-    conn.exec_query("SELECT i.relname as indname,
-      i.relowner as indowner,
-      idx.indrelid::regclass::text as table,
-      am.amname as indam,
-      idx.indkey,
-      ARRAY(
-            SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
-                   FROM generate_subscripts(idx.indkey, 1) as k
-                   ORDER BY k
-                   ) as keys,
-      idx.indexprs IS NOT NULL as indexprs,
-      idx.indpred IS NOT NULL as indpred
-      FROM   pg_index as idx
-      JOIN   pg_class as i
-      ON     i.oid = idx.indexrelid
-      JOIN   pg_am as am
-      ON     i.relam = am.oid
-      JOIN   pg_namespace as ns
-      ON     ns.oid = i.relnamespace
-      AND    ns.nspname = ANY(current_schemas(false))").each do |idx|
-      if idx['keys'].match(/to_tsvector/)
-        indexes[idx['table']] ||= []
-        indexes[idx['table']] << idx
-      end
-    end
-    fts_tables =  ["collections", "container_requests", "groups", "jobs",
-                   "pipeline_instances", "pipeline_templates", "workflows"]
-    fts_tables.each do |table|
-      table_class = table.classify.constantize
-      if table_class.respond_to?('full_text_searchable_columns')
-        expect = table_class.full_text_searchable_columns
-        ok = false
-        indexes[table].andand.each do |idx|
-          if expect == idx['keys'].scan(/COALESCE\(([A-Za-z_]+)/).flatten
-            ok = true
-          end
-        end
-        assert ok, "#{table} has no full-text index\nexpect: #{expect.inspect}\nfound: #{indexes[table].inspect}"
-      end
-    end
-  end
-
   [
     %w[collections collections_trgm_text_search_idx],
     %w[container_requests container_requests_trgm_text_search_idx],
index 897447dd11c7a95a5b113d867fb0de28cbed6844..16dcd2aaf6ee5d57e9bb60176a643a9116df8f9e 100644 (file)
@@ -23,6 +23,7 @@ import (
        "os"
        "sort"
        "strings"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/lib/config"
@@ -367,6 +368,94 @@ func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) {
        }
 }
 
+func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) {
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-111111111111111": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       ReadOnly:       true,
+                       StorageClasses: map[string]bool{"class1": true}},
+       }
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+       resp := IssueRequest(s.handler,
+               &RequestTester{
+                       method:         "PUT",
+                       uri:            "/" + TestHash,
+                       requestBody:    TestBlock,
+                       storageClasses: "class1",
+               })
+       c.Check(resp.Code, check.Equals, FullError.HTTPCode)
+       c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0)
+}
+
+func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) {
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-111111111111111": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class1": true}},
+               "zzzzz-nyw5e-121212121212121": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class1": true, "class2": true}},
+               "zzzzz-nyw5e-222222222222222": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class2": true}},
+       }
+
+       for _, trial := range []struct {
+               setCounter uint32 // value to stuff vm.counter, to control offset
+               classes    string // desired classes
+               put111     int    // expected number of "put" ops on 11111... after 2x put reqs
+               put121     int    // expected number of "put" ops on 12121...
+               put222     int    // expected number of "put" ops on 22222...
+               cmp111     int    // expected number of "compare" ops on 11111... after 2x put reqs
+               cmp121     int    // expected number of "compare" ops on 12121...
+               cmp222     int    // expected number of "compare" ops on 22222...
+       }{
+               {0, "class1",
+                       1, 0, 0,
+                       2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121
+               {0, "class2",
+                       0, 1, 0,
+                       0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121
+               {0, "class1,class2",
+                       1, 1, 0,
+                       2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+               {1, "class1,class2",
+                       0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121
+                       2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+               {0, "class1,class2,class404",
+                       1, 1, 0,
+                       2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121
+       } {
+               c.Logf("%+v", trial)
+               s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+                       "class1": {},
+                       "class2": {},
+                       "class3": {},
+               }
+               c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+               atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter)
+               for i := 0; i < 2; i++ {
+                       IssueRequest(s.handler,
+                               &RequestTester{
+                                       method:         "PUT",
+                                       uri:            "/" + TestHash,
+                                       requestBody:    TestBlock,
+                                       storageClasses: trial.classes,
+                               })
+               }
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222)
+       }
+}
+
 // Test TOUCH requests.
 func (s *HandlerSuite) TestTouchHandler(c *check.C) {
        c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
index 2b469a13eb993e0827bac8ae1ebe4db46bc8c4df..910033ebb1d8408c90a4bde441d7edc8d99b109a 100644 (file)
@@ -18,6 +18,7 @@ import (
        "strconv"
        "strings"
        "sync"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
@@ -741,6 +742,7 @@ func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []b
 }
 
 type putProgress struct {
+       classNeeded      map[string]bool
        classTodo        map[string]bool
        mountUsed        map[*VolumeMount]bool
        totalReplication int
@@ -769,7 +771,7 @@ func (pr putProgress) ClassReplication() string {
 
 func (pr *putProgress) Add(mnt *VolumeMount) {
        if pr.mountUsed[mnt] {
-               logrus.Warnf("BUG? superfluous extra write to mount %s", mnt)
+               logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID)
                return
        }
        pr.mountUsed[mnt] = true
@@ -780,6 +782,21 @@ func (pr *putProgress) Add(mnt *VolumeMount) {
        }
 }
 
+func (pr *putProgress) Sub(mnt *VolumeMount) {
+       if !pr.mountUsed[mnt] {
+               logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID)
+               return
+       }
+       pr.mountUsed[mnt] = false
+       pr.totalReplication -= mnt.Replication
+       for class := range mnt.StorageClasses {
+               pr.classDone[class] -= mnt.Replication
+               if pr.classNeeded[class] {
+                       pr.classTodo[class] = true
+               }
+       }
+}
+
 func (pr *putProgress) Done() bool {
        return len(pr.classTodo) == 0 && pr.totalReplication > 0
 }
@@ -800,47 +817,65 @@ func (pr *putProgress) Want(mnt *VolumeMount) bool {
        return false
 }
 
-func newPutResult(classes []string) putProgress {
+func (pr *putProgress) Copy() *putProgress {
+       cp := putProgress{
+               classNeeded:      pr.classNeeded,
+               classTodo:        make(map[string]bool, len(pr.classTodo)),
+               classDone:        make(map[string]int, len(pr.classDone)),
+               mountUsed:        make(map[*VolumeMount]bool, len(pr.mountUsed)),
+               totalReplication: pr.totalReplication,
+       }
+       for k, v := range pr.classTodo {
+               cp.classTodo[k] = v
+       }
+       for k, v := range pr.classDone {
+               cp.classDone[k] = v
+       }
+       for k, v := range pr.mountUsed {
+               cp.mountUsed[k] = v
+       }
+       return &cp
+}
+
+func newPutProgress(classes []string) putProgress {
        pr := putProgress{
-               classTodo: make(map[string]bool, len(classes)),
-               classDone: map[string]int{},
-               mountUsed: map[*VolumeMount]bool{},
+               classNeeded: make(map[string]bool, len(classes)),
+               classTodo:   make(map[string]bool, len(classes)),
+               classDone:   map[string]int{},
+               mountUsed:   map[*VolumeMount]bool{},
        }
        for _, c := range classes {
                if c != "" {
+                       pr.classNeeded[c] = true
                        pr.classTodo[c] = true
                }
        }
        return pr
 }
 
-// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-// PutBlock(ctx, block, hash)
-//   Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-//   The MD5 checksum of the block must be identical to the content id HASH.
-//   If not, an error is returned.
+// PutBlock stores the given block on one or more volumes.
 //
-//   PutBlock stores the BLOCK on the first Keep volume with free space.
-//   A failure code is returned to the user only if all volumes fail.
+// The MD5 checksum of the block must match the given hash.
 //
-//   On success, PutBlock returns nil.
-//   On failure, it returns a KeepError with one of the following codes:
+// The block is written to each writable volume (ordered by priority
+// and then UUID, see volume.go) until at least one replica has been
+// stored in each of the requested storage classes.
 //
-//   500 Collision
-//          A different block with the same hash already exists on this
-//          Keep server.
-//   422 MD5Fail
-//          The MD5 hash of the BLOCK does not match the argument HASH.
-//   503 Full
-//          There was not enough space left in any Keep volume to store
-//          the object.
-//   500 Fail
-//          The object could not be stored for some other reason (e.g.
-//          all writes failed). The text of the error message should
-//          provide as much detail as possible.
+// The returned error, if any, is a KeepError with one of the
+// following codes:
 //
+// 500 Collision
+//        A different block with the same hash already exists on this
+//        Keep server.
+// 422 MD5Fail
+//        The MD5 hash of the BLOCK does not match the argument HASH.
+// 503 Full
+//        There was not enough space left in any Keep volume to store
+//        the object.
+// 500 Fail
+//        The object could not be stored for some other reason (e.g.
+//        all writes failed). The text of the error message should
+//        provide as much detail as possible.
 func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) {
        log := ctxlog.FromContext(ctx)
 
@@ -851,72 +886,88 @@ func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash s
                return putProgress{}, RequestHashError
        }
 
-       result := newPutResult(wantStorageClasses)
+       result := newPutProgress(wantStorageClasses)
 
        // If we already have this data, it's intact on disk, and we
        // can update its timestamp, return success. If we have
        // different data with the same hash, return failure.
-       if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil {
+       if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() {
                return result, err
        }
        if ctx.Err() != nil {
                return result, ErrClientDisconnect
        }
 
-       // Choose a Keep volume to write to.
-       // If this volume fails, try all of the volumes in order.
-       if mnt := volmgr.NextWritable(); mnt == nil || !result.Want(mnt) {
-               // fall through to "try all volumes" below
-       } else if err := mnt.Put(ctx, hash, block); err != nil {
-               log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-       } else {
-               result.Add(mnt)
-               if result.Done() {
-                       return result, nil
-               }
-       }
-       if ctx.Err() != nil {
-               return putProgress{}, ErrClientDisconnect
-       }
-
-       writables := volmgr.AllWritable()
+       writables := volmgr.NextWritable()
        if len(writables) == 0 {
                log.Error("no writable volumes")
-               return putProgress{}, FullError
+               return result, FullError
        }
 
-       allFull := true
+       var wg sync.WaitGroup
+       var mtx sync.Mutex
+       cond := sync.Cond{L: &mtx}
+       // pending predicts what result will be if all pending writes
+       // succeed.
+       pending := result.Copy()
+       var allFull atomic.Value
+       allFull.Store(true)
+
+       // We hold the lock for the duration of the "each volume" loop
+       // below, except when it is released during cond.Wait().
+       mtx.Lock()
+
        for _, mnt := range writables {
+               // Wait until our decision to use this mount does not
+               // depend on the outcome of pending writes.
+               for result.Want(mnt) && !pending.Want(mnt) {
+                       cond.Wait()
+               }
                if !result.Want(mnt) {
                        continue
                }
-               err := mnt.Put(ctx, hash, block)
-               if ctx.Err() != nil {
-                       return result, ErrClientDisconnect
-               }
-               switch err {
-               case nil:
-                       result.Add(mnt)
-                       if result.Done() {
-                               return result, nil
+               mnt := mnt
+               pending.Add(mnt)
+               wg.Add(1)
+               go func() {
+                       log.Debugf("PutBlock: start write to %s", mnt.UUID)
+                       defer wg.Done()
+                       err := mnt.Put(ctx, hash, block)
+
+                       mtx.Lock()
+                       if err != nil {
+                               log.Debugf("PutBlock: write to %s failed", mnt.UUID)
+                               pending.Sub(mnt)
+                       } else {
+                               log.Debugf("PutBlock: write to %s succeeded", mnt.UUID)
+                               result.Add(mnt)
                        }
-                       continue
-               case FullError:
-                       continue
-               default:
-                       // The volume is not full but the
-                       // write did not succeed.  Report the
-                       // error and continue trying.
-                       allFull = false
-                       log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-               }
+                       cond.Broadcast()
+                       mtx.Unlock()
+
+                       if err != nil && err != FullError && ctx.Err() == nil {
+                               // The volume is not full but the
+                               // write did not succeed.  Report the
+                               // error and continue trying.
+                               allFull.Store(false)
+                               log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
+                       }
+               }()
+       }
+       mtx.Unlock()
+       wg.Wait()
+       if ctx.Err() != nil {
+               return result, ErrClientDisconnect
+       }
+       if result.Done() {
+               return result, nil
        }
 
        if result.totalReplication > 0 {
                // Some, but not all, of the storage classes were
                // satisfied. This qualifies as success.
                return result, nil
-       } else if allFull {
+       } else if allFull.Load().(bool) {
                log.Error("all volumes with qualifying storage classes are full")
                return putProgress{}, FullError
        } else {
index 9bfc6ca3e5191d2953ceac75f915a07cab19c69f..3f7c9cb79b4b24b71c3c441e49235fd657d77e69 100644 (file)
@@ -344,11 +344,11 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my
                        vm.writables = append(vm.writables, mnt)
                }
        }
-       // pri(i): return highest priority of any storage class
-       // offered by vm.readables[i]
-       pri := func(i int) int {
+       // pri(mnt): return highest priority of any storage class
+       // offered by mnt
+       pri := func(mnt *VolumeMount) int {
                any, best := false, 0
-               for class := range vm.readables[i].KeepMount.StorageClasses {
+               for class := range mnt.KeepMount.StorageClasses {
                        if p := cluster.StorageClasses[class].Priority; !any || best < p {
                                best = p
                                any = true
@@ -356,14 +356,20 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my
                }
                return best
        }
-       // sort vm.readables, first by highest priority of any offered
+       // less(a,b): sort first by highest priority of any offered
        // storage class (highest->lowest), then by volume UUID
-       sort.Slice(vm.readables, func(i, j int) bool {
-               if pi, pj := pri(i), pri(j); pi != pj {
-                       return pi > pj
+       less := func(a, b *VolumeMount) bool {
+               if pa, pb := pri(a), pri(b); pa != pb {
+                       return pa > pb
                } else {
-                       return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID
+                       return a.KeepMount.UUID < b.KeepMount.UUID
                }
+       }
+       sort.Slice(vm.readables, func(i, j int) bool {
+               return less(vm.readables[i], vm.readables[j])
+       })
+       sort.Slice(vm.writables, func(i, j int) bool {
+               return less(vm.writables[i], vm.writables[j])
        })
        return vm, nil
 }
@@ -384,18 +390,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount {
        return vm.readables
 }
 
-// AllWritable returns an array of all writable volumes
+// AllWritable returns writable volumes, sorted by priority/uuid. Used
+// by CompareAndTouch to ensure higher-priority volumes are checked
+// first.
 func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
        return vm.writables
 }
 
-// NextWritable returns the next writable
-func (vm *RRVolumeManager) NextWritable() *VolumeMount {
+// NextWritable returns writable volumes, rotated by vm.counter so
+// each volume gets a turn to be first. Used by PutBlock to distribute
+// new data across available volumes.
+func (vm *RRVolumeManager) NextWritable() []*VolumeMount {
        if len(vm.writables) == 0 {
                return nil
        }
-       i := atomic.AddUint32(&vm.counter, 1)
-       return vm.writables[i%uint32(len(vm.writables))]
+       offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables)
+       return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...)
 }
 
 // VolumeStats returns an ioStats for the given volume.
index 79f0d3f4f6c2f0a21ddc5ab3d1e711831c1be896..c112972c4303103a6fee1fc920fa309022b340ee 100644 (file)
@@ -73,7 +73,7 @@ ENV DEBIAN_FRONTEND noninteractive
 #  gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less
 RUN apt-get update && \
     apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
-    gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less && \
+    gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less vim-tiny && \
     apt-get clean
 
 ENV GOPATH /var/lib/gopath
index fb3eaaeee875e147f761cef7dbb8f317be7aaa31..a112cb93fe07cadbcfb814606497df0b0e9328f8 100755 (executable)
@@ -59,5 +59,6 @@ fi
 export VERSION=$(./version-at-commit.sh)
 export BROWSER=none
 export CI=true
+export HTTPS=false
 node --version
 exec node node_modules/react-scripts/scripts/start.js
diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py
new file mode 100644 (file)
index 0000000..9a02745
--- /dev/null
@@ -0,0 +1,443 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: CC-BY-SA-3.0
+
+import argparse
+import logging
+import random
+import string
+import sys
+
+import arvados
+import arvados.collection
+
+logger = logging.getLogger('arvados.test_collection_create')
+logger.setLevel(logging.INFO)
+
+opts = argparse.ArgumentParser(add_help=False)
+opts.add_argument('--min-files', type=int, default=30000, help="""
+Minimum number of files on each directory. Default: 30000.
+""")
+opts.add_argument('--max-files', type=int, default=30000, help="""
+Maximum number of files on each directory. Default: 30000.
+""")
+opts.add_argument('--min-depth', type=int, default=0, help="""
+Minimum depth for the created tree structure. Default: 0.
+""")
+opts.add_argument('--max-depth', type=int, default=0, help="""
+Maximum depth for the created tree structure. Default: 0.
+""")
+opts.add_argument('--min-subdirs', type=int, default=1, help="""
+Minimum number of subdirectories created at every depth level. Default: 1.
+""")
+opts.add_argument('--max-subdirs', type=int, default=10, help="""
+Maximum number of subdirectories created at every depth level. Default: 10.
+""")
+opts.add_argument('--debug', action='store_true', default=False, help="""
+Sets logging level to DEBUG.
+""")
+
+arg_parser = argparse.ArgumentParser(
+    description='Create a collection with garbage data for testing purposes.',
+    parents=[opts])
+
+adjectives = ['abandoned','able','absolute','adorable','adventurous','academic',
+    'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic',
+    'active','actual','adept','admirable','admired','adolescent','adorable','adored',
+    'advanced','afraid','affectionate','aged','aggravating','aggressive','agile',
+    'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated',
+    'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored',
+    'ancient','angelic','angry','anguished','animated','annual','another','antique',
+    'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic',
+    'ashamed','assured','astonishing','athletic','attached','attentive','attractive',
+    'austere','authentic','authorized','automatic','avaricious','average','aware','awesome',
+    'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful',
+    'belated','beloved','beneficial','better','best','bewitched','big','big-hearted',
+    'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank',
+    'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling',
+    'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable',
+    'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly',
+    'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing',
+    'calculating','calm','candid','canine','capital','carefree','careful','careless',
+    'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery',
+    'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever',
+    'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful',
+    'colorless','colossal','comfortable','common','compassionate','competent','complete',
+    'complex','complicated','composed','concerned','concrete','confused','conscious',
+    'considerate','constant','content','conventional','cooked','cool','cooperative',
+    'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy',
+    'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded',
+    'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy',
+    'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark',
+    'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive',
+    'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed',
+    'delectable','delicious','delightful','delirious','demanding','dense','dental',
+    'dependable','dependent','descriptive','deserted','detailed','determined','devoted',
+    'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct',
+    'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant',
+    'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct',
+    'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic',
+    'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early',
+    'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated',
+    'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished',
+    'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened',
+    'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed',
+    'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted',
+    'excellent','exemplary','exhausted','excitable','excited','exciting','exotic',
+    'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small',
+    'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous',
+    'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal',
+    'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female',
+    'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand',
+    'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering',
+    'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy',
+    'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant',
+    'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened',
+    'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen',
+    'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan',
+    'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted',
+    'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering',
+    'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous',
+    'graceful','gracious','grand','grandiose','granular','grateful','grave','gray',
+    'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross',
+    'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome',
+    'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy',
+    'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious',
+    'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly',
+    'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious',
+    'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible',
+    'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry',
+    'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic',
+    'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate',
+    'illustrious','imaginary','imaginative','immaculate','immaterial','immediate',
+    'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish',
+    'impolite','important','impossible','impractical','impressionable','impressive',
+    'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential',
+    'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated',
+    'inferior','infinite','informal','innocent','insecure','insidious','insignificant',
+    'insistent','instructive','insubstantial','intelligent','intent','intentional',
+    'interesting','internal','international','intrepid','ironclad','irresponsible',
+    'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery',
+    'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo',
+    'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly',
+    'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame',
+    'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean',
+    'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited',
+    'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome',
+    'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely',
+    'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious',
+    'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous',
+    'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical',
+    'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy',
+    'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly',
+    'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental',
+    'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored',
+    'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural',
+    'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible',
+    'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy',
+    'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious',
+    'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd',
+    'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal',
+    'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary',
+    'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding',
+    'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry',
+    'parallel','parched','partial','passionate','past','pastel','peaceful','peppery',
+    'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic',
+    'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic',
+    'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political',
+    'pointed','pointless','poised','poor','popular','portly','posh','positive','possible',
+    'potable','powerful','powerless','practical','precious','present','prestigious',
+    'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private',
+    'prize','probable','productive','profitable','profuse','proper','proud','prudent',
+    'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling',
+    'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable',
+    'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical',
+    'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular',
+    'ready','real','realistic','reasonable','red','reflecting','regal','regular',
+    'reliable','relieved','remarkable','remorseful','remote','repentant','required',
+    'respectful','responsible','repulsive','revolving','rewarding','rich','rigid',
+    'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough',
+    'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty',
+    'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied',
+    'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful',
+    'scratchy','scrawny','second','secondary','second-hand','secret','self-assured',
+    'self-reliant','selfish','sentimental','separate','serene','serious','serpentine',
+    'several','severe','shabby','shadowy','shady','shallow','shameful','shameless',
+    'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term',
+    'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple',
+    'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight',
+    'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug',
+    'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid',
+    'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy',
+    'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy',
+    'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square',
+    'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark',
+    'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange',
+    'steel','strict','strident','striking','striped','strong','studious','stunning',
+    'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle',
+    'suburban','sudden','sugary','sunny','super','superb','superficial','superior',
+    'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering',
+    'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty',
+    'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible',
+    'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this',
+    'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy',
+    'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured',
+    'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim',
+    'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby',
+    'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable',
+    'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit',
+    'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique',
+    'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant',
+    'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy',
+    'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling',
+    'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful',
+    'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid',
+    'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious',
+    'victorious','vigilant','vigorous','villainous','violet','violent','virtual',
+    'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm',
+    'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy',
+    'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome',
+    'well-documented','well-groomed','well-informed','well-lit','well-made','well-off',
+    'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white',
+    'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted',
+    'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful',
+    'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst',
+    'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry',
+    'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous',
+    'zesty','zigzag']
+nouns = ['people','history','way','art','world','information','map','two','family',
+    'government','health','system','computer','meat','year','thanks','music','person',
+    'reading','method','data','food','understanding','theory','law','bird','literature',
+    'problem','software','control','knowledge','power','ability','economics','love',
+    'internet','television','science','library','nature','fact','product','idea',
+    'temperature','investment','area','society','activity','story','industry','media',
+    'thing','oven','community','definition','safety','quality','development','language',
+    'management','player','variety','video','week','security','country','exam','movie',
+    'organization','equipment','physics','analysis','policy','series','thought','basis',
+    'boyfriend','direction','strategy','technology','army','camera','freedom','paper',
+    'environment','child','instance','month','truth','marketing','university','writing',
+    'article','department','difference','goal','news','audience','fishing','growth',
+    'income','marriage','user','combination','failure','meaning','medicine','philosophy',
+    'teacher','communication','night','chemistry','disease','disk','energy','nation',
+    'road','role','soup','advertising','location','success','addition','apartment','education',
+    'math','moment','painting','politics','attention','decision','event','property',
+    'shopping','student','wood','competition','distribution','entertainment','office',
+    'population','president','unit','category','cigarette','context','introduction',
+    'opportunity','performance','driver','flight','length','magazine','newspaper',
+    'relationship','teaching','cell','dealer','finding','lake','member','message','phone',
+    'scene','appearance','association','concept','customer','death','discussion','housing',
+    'inflation','insurance','mood','woman','advice','blood','effort','expression','importance',
+    'opinion','payment','reality','responsibility','situation','skill','statement','wealth',
+    'application','city','county','depth','estate','foundation','grandmother','heart',
+    'perspective','photo','recipe','studio','topic','collection','depression','imagination',
+    'passion','percentage','resource','setting','ad','agency','college','connection',
+    'criticism','debt','description','memory','patience','secretary','solution','administration',
+    'aspect','attitude','director','personality','psychology','recommendation','response',
+    'selection','storage','version','alcohol','argument','complaint','contract','emphasis',
+    'highway','loss','membership','possession','preparation','steak','union','agreement',
+    'cancer','currency','employment','engineering','entry','interaction','mixture','preference',
+    'region','republic','tradition','virus','actor','classroom','delivery','device',
+    'difficulty','drama','election','engine','football','guidance','hotel','owner',
+    'priority','protection','suggestion','tension','variation','anxiety','atmosphere',
+    'awareness','bath','bread','candidate','climate','comparison','confusion','construction',
+    'elevator','emotion','employee','employer','guest','height','leadership','mall','manager',
+    'operation','recording','sample','transportation','charity','cousin','disaster','editor',
+    'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome',
+    'permission','presentation','promotion','reflection','refrigerator','resolution','revenue',
+    'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee',
+    'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud',
+    'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant',
+    'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife',
+    'accident','airport','appointment','arrival','assumption','baseball','chapter','committee',
+    'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall',
+    'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie',
+    'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea',
+    'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate',
+    'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination',
+    'garbage','grocery','honey','impression','improvement','independence','insect','inspection',
+    'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity',
+    'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair',
+    'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday',
+    'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear',
+    'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady',
+    'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution',
+    'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat',
+    'trainer','uncle','youth','time','work','film','water','money','example','while','business','study',
+    'game','life','form','air','day','place','number','part','field','fish','back','process','heat',
+    'hand','experience','job','book','end','point','type','home','economy','value','body','market',
+    'guide','interest','state','radio','course','company','price','size','card','list','mind','trade',
+    'line','care','group','risk','word','fat','force','key','light','training','name','school','top',
+    'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun',
+    'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture',
+    'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause',
+    'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class',
+    'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance',
+    'figure','man','model','source','beginning','earth','program','chicken','design','feature','head',
+    'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun',
+    'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard',
+    'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame',
+    'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view',
+    'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift',
+    'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot',
+    'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square',
+    'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot',
+    'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot',
+    'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence',
+    'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar',
+    'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer',
+    'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign',
+    'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park',
+    'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit',
+    'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart',
+    'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season',
+    'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path',
+    'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast',
+    'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance',
+    'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance',
+    'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew',
+    'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex',
+    'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat',
+    'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole',
+    'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile',
+    'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle',
+    'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency',
+    'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband',
+    'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof',
+    'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation',
+    'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother',
+    'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf',
+    'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent',
+    'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank',
+    'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar',
+    'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage',
+    'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise',
+    'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle',
+    'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer',
+    'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow',
+    'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug',
+    'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer',
+    'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip',
+    'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring',
+    'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you',
+    'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great',
+    'being','few','might','still','public','read','keep','start','give','human','local','general','she',
+    'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big',
+    'possible','particular','today','major','personal','current','national','cut','natural','physical',
+    'show','try','check','second','call','move','pay','let','increase','single','individual','turn',
+    'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook',
+    'alternative','following','special','working','whole','dance','excuse','cold','commercial','low',
+    'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present',
+    'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run',
+    'complex','due','effective','middle','regular','reserve','independent','leave','original','reach',
+    'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit',
+    'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique',
+    'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar',
+    'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy',
+    'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish',
+    'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow',
+    'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat',
+    'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse',
+    'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark',
+    'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack',
+    'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird',
+    'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing',
+    'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt',
+    'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape',
+    'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt',
+    'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain',
+    'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny',
+    'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect',
+    'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand',
+    'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal',
+    'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry',
+    'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip',
+    'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap',
+    'yesterday']
+
+def get_random_name(with_ext=True):
+    return "{}_{}_{}{}".format(
+        random.choice(adjectives),
+        random.choice(nouns),
+        random.randint(0, 50000),
+        with_ext and '.txt' or '')
+
+def get_random_file(max_filesize):
+    file_start = random.randint(0, (max_filesize - 1025))
+    file_size = random.randint(0, (max_filesize - file_start))
+    file_name = get_random_name()
+    return "{}:{}:{}".format(file_start, file_size, file_name)
+
+def get_stream(name, max_filesize, data_loc, args):
+    files = []
+    for _ in range(random.randint(args.min_files, args.max_files)):
+        files.append(get_random_file(max_filesize))
+    stream = "{} {} {}".format(name, data_loc, ' '.join(files))
+    return stream
+
+def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
+    current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
+    current_size += len(current_stream)
+    streams = [current_stream]
+
+    if current_size >= (128 * 1024 * 1024):
+        logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
+    elif depth == 0:
+        logger.debug("Finished stream {}".format(base_stream_name))
+    else:
+        for _ in range(random.randint(args.min_subdirs, args.max_subdirs)):
+            stream_name = base_stream_name+'/'+get_random_name(False)
+            substreams = create_substreams(depth-1, stream_name, max_filesize,
+                data_loc, args, current_size)
+            current_size += sum([len(x) for x in substreams])
+            if current_size >= (128 * 1024 * 1024):
+                break
+            streams.extend(substreams)
+    return streams
+
+def parse_arguments(arguments):
+    args = arg_parser.parse_args(arguments)
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+    if args.max_files < args.min_files:
+        arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files))
+    if args.min_depth < 0:
+        arg_parser.error("--min-depth should be at least 0")
+    if args.max_depth < 0 or args.max_depth < args.min_depth:
+        arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth))
+    if args.max_subdirs < args.min_subdirs:
+        arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs))
+    return args
+
+def main(arguments=None):
+    args = parse_arguments(arguments)
+    logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs))
+    api = arvados.api('v1', timeout=5*60)
+    max_filesize = 1024*1024
+    data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])
+    data_loc = arvados.KeepClient(api).put(data_block)
+    streams = create_substreams(random.randint(args.min_depth, args.max_depth),
+        '.', max_filesize, data_loc, args)
+    manifest = ''
+    for s in streams:
+        if len(manifest)+len(s) > (1024*1024*128)-2:
+            logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0]))
+            break
+        manifest += s + '\n'
+    try:
+        coll_name = get_random_name(False)
+        coll = api.collections().create(
+            body={"collection": {
+                "name": coll_name,
+                "manifest_text": manifest
+            },
+        }).execute()
+    except:
+        logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest)))
+        raise
+    logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest)))
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file