Merge branch '15159-export-trustallcontent' into main. Closes #15159
authorStephen Smith <stephen@curii.com>
Thu, 26 Aug 2021 16:52:20 +0000 (12:52 -0400)
committerStephen Smith <stephen@curii.com>
Thu, 26 Aug 2021 16:52:20 +0000 (12:52 -0400)
Arvados-DCO-1.1-Signed-off-by: Stephen Smith <stephen@curii.com>

43 files changed:
apps/workbench/test/controllers/work_units_controller_test.rb
apps/workbench/test/integration/work_units_test.rb
doc/admin/upgrading.html.textile.liquid
doc/api/methods.html.textile.liquid
lib/controller/integration_test.go
lib/crunchrun/singularity.go
services/api/app/models/api_client_authorization.rb
services/api/db/migrate/20210816191509_drop_fts_index.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/record_filters.rb
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/pipeline_instances.yml
services/api/test/functional/arvados/v1/filters_test.rb
services/api/test/integration/collections_api_test.rb
services/api/test/integration/groups_test.rb
services/api/test/unit/arvados_model_test.rb
services/fuse/arvados_fuse/fusedir.py
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/volume.go
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/service/workbench2/run-service
tools/salt-install/Vagrantfile
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls
tools/salt-install/local.params.example.multiple_hosts
tools/salt-install/local.params.example.single_host_multiple_hostnames
tools/salt-install/local.params.example.single_host_single_hostname
tools/salt-install/provision.sh
tools/salt-install/tests/run-test.sh
tools/test-collection-create/test-collection-create.py [new file with mode: 0644]

index 6f74955cd1c8d0940c979b70867a6cbbfda5aacb..0191c7f0df6f768959e7716e95abd68128e21bf9 100644 (file)
@@ -13,26 +13,26 @@ class WorkUnitsControllerTest < ActionController::TestCase
   [
     ['foo', 10, 25,
       ['/pipeline_instances/zzzzz-d1hrv-1xfj6xkicf2muk2',
-       '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+       '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7'],
       ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
     ['pipeline_with_tagged_collection_input', 1, 1,
       ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3'],
-      ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+      ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
     ['no_such_match', 0, 0,
       [],
-      ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+      ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
   ].each do |search_filter, expected_min, expected_max, expected, not_expected|
     test "all_processes page for search filter '#{search_filter}'" do
-      work_units_index(filters: [['any','@@', search_filter]], show_children: true)
+      work_units_index(filters: [['any','ilike', "%#{search_filter}%"]], show_children: true)
       assert_response :success
 
       # Verify that expected number of processes are found
index 4f2ebbc554d624440cd4dc5251667c7c5ecadfba..36b29468ff8b1012d32232b1031ee8f2cf4f6ab3 100644 (file)
@@ -14,7 +14,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
 
   [[true, 25, 100,
     ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
-     '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+     '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
      '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7',
      '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
      '/container_requests/zzzzz-xvhdp-cr4completedcr2',
@@ -23,7 +23,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
      '/container_requests/zzzzz-xvhdp-oneof60crs00001']],
    [false, 25, 100,
     ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
-     '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+     '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
      '/container_requests/zzzzz-xvhdp-cr4completedcr2'],
     ['/pipeline_instances/zzzzz-d1hrv-scarxiyajtshq3l',
      '/container_requests/zzzzz-xvhdp-oneof60crs00001',
index 8435e2871f882fd6588b709711182443dcc8f2f3..9e7410260f8955ea35f1d6e4790e792feb1c670f 100644 (file)
@@ -39,6 +39,10 @@ h2(#main). development main (as of 2021-07-15)
 
 "Upgrading from 2.2.0":#v2_2_0
 
+h3. Removed deprecated '@@' search operator
+
+The '@@' full text search operator, previously deprecated, has been removed. To perform a string search across multiple columns, use the 'ilike' operator on 'any' column as described in the "available list method filter section":{{site.baseurl}}/api/methods.html#substringsearchfilter of the API documentation.
+
 h3. Storage classes must be defined explicitly
 
 If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details.
index c6e4ba00a74d8f1dcc440dfd83c125c17c9d6c1b..670a9e0da3d96ed16f8de9e053ae5a746cf0aa31 100644 (file)
@@ -96,7 +96,7 @@ table(table table-bordered table-condensed).
 |1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@|
 |2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@|
 
-The following operators are available.[1]
+The following operators are available.
 
 table(table table-bordered table-condensed).
 |_. Operator|_. Operand type|_. Description|_. Example|
@@ -167,5 +167,3 @@ table(table table-bordered table-condensed).
 |_. Argument |_. Type |_. Description |_. Location |
 {background:#ccffcc}.|uuid|string|The UUID of the resource in question.|path||
 |{resource_type}|object||query||
-
-fn1^. NOTE: The filter operator for full-text search (@@) which previously worked (but was undocumented) is deprecated and will be removed in a future release.
index 26f0dbb0d1388da1886cea726fc644648b4d57e3..6851442054e1f49e8cde8c87dcced6d9eea0918a 100644 (file)
@@ -20,6 +20,7 @@ import (
        "path/filepath"
        "strconv"
        "strings"
+       "sync"
 
        "git.arvados.org/arvados.git/lib/boot"
        "git.arvados.org/arvados.git/lib/config"
@@ -187,6 +188,49 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
        c.Check(coll.PortableDataHash, check.Equals, pdh)
 }
 
+// Tests bug #18004
+func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) {
+       conn1 := s.testClusters["z1111"].Conn()
+       rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+       rootctx2, _, _ := s.testClusters["z2222"].RootClients()
+       conn2 := s.testClusters["z2222"].Conn()
+       userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2@example.com", true)
+
+       var wg1, wg2 sync.WaitGroup
+       creqs := 100
+
+       // Make concurrent requests to z2222 with a local token to make sure more
+       // than one worker is listening.
+       wg1.Add(1)
+       for i := 0; i < creqs; i++ {
+               wg2.Add(1)
+               go func() {
+                       defer wg2.Done()
+                       wg1.Wait()
+                       _, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{})
+                       c.Check(err, check.IsNil, check.Commentf("warm up phase failed"))
+               }()
+       }
+       wg1.Done()
+       wg2.Wait()
+
+       // Real test pass -- use a new remote token than the one used in the warm-up
+       // phase.
+       wg1.Add(1)
+       for i := 0; i < creqs; i++ {
+               wg2.Add(1)
+               go func() {
+                       defer wg2.Done()
+                       wg1.Wait()
+                       // Retrieve the remote collection from cluster z2222.
+                       _, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{})
+                       c.Check(err, check.IsNil, check.Commentf("testing phase failed"))
+               }()
+       }
+       wg1.Done()
+       wg2.Wait()
+}
+
 func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) {
        if _, err := exec.LookPath("s3cmd"); err != nil {
                c.Skip("s3cmd not in PATH")
@@ -502,7 +546,7 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
 }
 
 // We test the direct access to the database
-// normally an integration test would not have a database access, but  in this case we need
+// normally an integration test would not have a database access, but in this case we need
 // to test tokens that are secret, so there is no API response that will give them back
 func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) {
        ctx := context.Background()
index 741f542454e470ede35cc6f682c64c8a9b1bbf09..61fecad0a13c06664890a9cf2dfffb8346b7a47e 100644 (file)
@@ -101,7 +101,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar
        if len(cl.Items) == 1 {
                imageCollection = cl.Items[0]
        } else {
-               collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339)
+               collectionName := "converting " + collectionName
                exp := time.Now().Add(24 * 7 * 2 * time.Hour)
                err = containerClient.RequestAndDecode(&imageCollection,
                        arvados.EndpointCollectionCreate.Method,
@@ -112,6 +112,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar
                                        "name":       collectionName,
                                        "trash_at":   exp.UTC().Format(time.RFC3339),
                                },
+                               "ensure_unique_name": true,
                        })
                if err != nil {
                        return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err)
@@ -141,6 +142,12 @@ func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath s
        }
 
        if _, err := os.Stat(imageFilename); os.IsNotExist(err) {
+               // Make sure the docker image is readable, and error
+               // out if not.
+               if _, err := os.Stat(imageTarballPath); err != nil {
+                       return err
+               }
+
                e.logf("building singularity image")
                // "singularity build" does not accept a
                // docker-archive://... filename containing a ":" character,
index 52f2cee064905fd6a81e4e9e60a774dfc80bab55..7c7ed759c60058b5915ad1d56505dba6b56d84dd 100644 (file)
@@ -319,7 +319,17 @@ class ApiClientAuthorization < ArvadosModel
         user.last_name = "from cluster #{remote_user_prefix}"
       end
 
-      user.save!
+      begin
+        user.save!
+      rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique
+        Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...")
+        # Some other request won the race: retry fetching the user record.
+        user = User.find_by_uuid(remote_user['uuid'])
+        if !user
+          Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}")
+          return nil
+        end
+      end
 
       if user.is_invited && !remote_user['is_invited']
         # Remote user is not "invited" state, they should be unsetup, which
@@ -364,12 +374,24 @@ class ApiClientAuthorization < ArvadosModel
       exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh,
              remote_token.andand['expires_at']].compact.min
       scopes = remote_token.andand['scopes'] || ['all']
-      auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
-        auth.user = user
-        auth.api_token = stored_secret
-        auth.api_client_id = 0
-        auth.scopes = scopes
-        auth.expires_at = exp
+      begin
+        retries ||= 0
+        auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
+          auth.user = user
+          auth.api_token = stored_secret
+          auth.api_client_id = 0
+          auth.scopes = scopes
+          auth.expires_at = exp
+        end
+      rescue ActiveRecord::RecordNotUnique
+        Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...")
+        # Some other request won the race: retry just once before erroring out
+        if (retries += 1) <= 1
+          retry
+        else
+          Rails.logger.warn("cannot find or create cached remote token #{token_uuid}")
+          return nil
+        end
       end
       auth.update_attributes!(user: user,
                               api_token: stored_secret,
diff --git a/services/api/db/migrate/20210816191509_drop_fts_index.rb b/services/api/db/migrate/20210816191509_drop_fts_index.rb
new file mode 100644 (file)
index 0000000..4ee1f55
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class DropFtsIndex < ActiveRecord::Migration[5.2]
+  def fts_indexes
+    {
+      "collections" => "collections_full_text_search_idx",
+      "container_requests" => "container_requests_full_text_search_idx",
+      "groups" => "groups_full_text_search_idx",
+      "jobs" => "jobs_full_text_search_idx",
+      "pipeline_instances" => "pipeline_instances_full_text_search_idx",
+      "pipeline_templates" => "pipeline_templates_full_text_search_idx",
+      "workflows" => "workflows_full_text_search_idx",
+    }
+  end
+
+  def up
+    fts_indexes.keys.each do |t|
+      i = fts_indexes[t]
+      execute "DROP INDEX IF EXISTS #{i}"
+    end
+  end
+
+  def down
+    fts_indexes.keys.each do |t|
+      i = fts_indexes[t]
+      execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector})"
+    end
+  end
+end
index 2bca887212a331143065d117816b81dc383f9b91..2f7748335694310b09de911104a446ce54885093 100644 (file)
@@ -238,29 +238,6 @@ SET default_tablespace = '';
 
 SET default_with_oids = false;
 
---
--- Name: groups; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.groups (
-    id integer NOT NULL,
-    uuid character varying(255),
-    owner_uuid character varying(255),
-    created_at timestamp without time zone NOT NULL,
-    modified_by_client_uuid character varying(255),
-    modified_by_user_uuid character varying(255),
-    modified_at timestamp without time zone,
-    name character varying(255) NOT NULL,
-    description character varying(524288),
-    updated_at timestamp without time zone NOT NULL,
-    group_class character varying(255),
-    trash_at timestamp without time zone,
-    is_trashed boolean DEFAULT false NOT NULL,
-    delete_at timestamp without time zone,
-    properties jsonb DEFAULT '{}'::jsonb
-);
-
-
 --
 -- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: -
 --
@@ -571,6 +548,29 @@ CREATE SEQUENCE public.containers_id_seq
 ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id;
 
 
+--
+-- Name: groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.groups (
+    id integer NOT NULL,
+    uuid character varying(255),
+    owner_uuid character varying(255),
+    created_at timestamp without time zone NOT NULL,
+    modified_by_client_uuid character varying(255),
+    modified_by_user_uuid character varying(255),
+    modified_at timestamp without time zone,
+    name character varying(255) NOT NULL,
+    description character varying(524288),
+    updated_at timestamp without time zone NOT NULL,
+    group_class character varying(255),
+    trash_at timestamp without time zone,
+    is_trashed boolean DEFAULT false NOT NULL,
+    delete_at timestamp without time zone,
+    properties jsonb DEFAULT '{}'::jsonb
+);
+
+
 --
 -- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: -
 --
@@ -1722,13 +1722,6 @@ CREATE INDEX authorized_keys_search_index ON public.authorized_keys USING btree
 CREATE INDEX collection_index_on_properties ON public.collections USING gin (properties);
 
 
---
--- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -1743,13 +1736,6 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u
 CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
 
 
---
--- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: -
 --
@@ -1785,13 +1771,6 @@ CREATE INDEX containers_search_index ON public.containers USING btree (uuid, own
 CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
 
 
---
--- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: groups_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2779,13 +2758,6 @@ CREATE UNIQUE INDEX index_workflows_on_uuid ON public.workflows USING btree (uui
 CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, job_uuid, created_by_job_task_uuid);
 
 
---
--- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: jobs_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2877,13 +2849,6 @@ CREATE INDEX permission_target ON public.materialized_permissions USING btree (t
 CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid);
 
 
---
--- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: pipeline_instances_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2905,13 +2870,6 @@ CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instance
 CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_templates USING btree (owner_uuid, name);
 
 
---
--- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: pipeline_templates_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2968,13 +2926,6 @@ CREATE INDEX users_search_index ON public.users USING btree (uuid, owner_uuid, m
 CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, hostname);
 
 
---
--- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: workflows_search_idx; Type: INDEX; Schema: public; Owner: -
 --
@@ -3194,6 +3145,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20201202174753'),
 ('20210108033940'),
 ('20210126183521'),
-('20210621204455');
+('20210621204455'),
+('20210816191509');
 
 
index 5688ca6140f17fcef94ed112481cddb06e75c668..f8898d63c90de2169fc8d18b53d40f68171ae945 100644 (file)
@@ -31,7 +31,10 @@ module RecordFilters
     model_table_name = model_class.table_name
     filters.each do |filter|
       attrs_in, operator, operand = filter
-      if attrs_in == 'any' && operator != '@@'
+      if operator == '@@'
+        raise ArgumentError.new("Full text search operator is no longer supported")
+      end
+      if attrs_in == 'any'
         attrs = model_class.searchable_columns(operator)
       elsif attrs_in.is_a? Array
         attrs = attrs_in
@@ -54,22 +57,6 @@ module RecordFilters
         attrs = []
       end
 
-      if operator == '@@'
-        # Full-text search
-        if attrs_in != 'any'
-          raise ArgumentError.new("Full text search on individual columns is not supported")
-        end
-        if operand.is_a? Array
-          raise ArgumentError.new("Full text search not supported for array operands")
-        end
-
-        # Skip the generic per-column operator loop below
-        attrs = []
-        # Use to_tsquery since plainto_tsquery does not support prefix
-        # search. And, split operand and join the words with ' & '
-        cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)"
-        param_out << operand.split.join(' & ')
-      end
       attrs.each do |attr|
         subproperty = attr.split(".", 2)
 
index 9b067aa263d2baede05c8a325560117a7d9df109..ab76417902214162506707d3e642f93539ffe7ed 100644 (file)
@@ -521,7 +521,7 @@ running_job_in_publicly_accessible_project:
   uuid: zzzzz-8i9sb-n7omg50bvt0m1nf
   owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: active/foo
+  repository: active/bar
   script: running_job_script
   script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
   state: Running
index 0865503281fe247f0fd027d4054d846a9370e9cf..9621b3effc1c74f0b832c021b3c9d2b99ef11586 100644 (file)
@@ -111,12 +111,9 @@ has_job:
 components_is_jobspec:
   # Helps test that clients cope with funny-shaped components.
   # For an example, see #3321.
-  uuid: zzzzz-d1hrv-jobspeccomponts
-  created_at: <%= 30.minute.ago.to_s(:db) %>
+  uuid: zzzzz-d1hrv-1yfj61234abcdk4
+  created_at: <%= 2.minute.ago.to_s(:db) %>
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: 2014-04-14 12:35:04 -0400
-  updated_at: 2014-04-14 12:35:04 -0400
-  modified_at: 2014-04-14 12:35:04 -0400
   modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   state: RunningOnServer
index 26270b1c3c9c9b4da0ec4c03f6a8d6fd861fbe70..bcb18078674ffd27bb124772b4478ebecfff9a76 100644 (file)
@@ -29,34 +29,14 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
                  json_response['errors'].join(' '))
   end
 
-  test 'error message for full text search on a specific column' do
+  test 'error message for unsupported full text search' do
     @controller = Arvados::V1::CollectionsController.new
     authorize_with :active
     get :index, params: {
       filters: [['uuid', '@@', 'abcdef']],
     }
     assert_response 422
-    assert_match(/not supported/, json_response['errors'].join(' '))
-  end
-
-  test 'difficult characters in full text search' do
-    @controller = Arvados::V1::CollectionsController.new
-    authorize_with :active
-    get :index, params: {
-      filters: [['any', '@@', 'a|b"c']],
-    }
-    assert_response :success
-    # (Doesn't matter so much which results are returned.)
-  end
-
-  test 'array operand in full text search' do
-    @controller = Arvados::V1::CollectionsController.new
-    authorize_with :active
-    get :index, params: {
-      filters: [['any', '@@', ['abc', 'def']]],
-    }
-    assert_response 422
-    assert_match(/not supported/, json_response['errors'].join(' '))
+    assert_match(/no longer supported/, json_response['errors'].join(' '))
   end
 
   test 'api responses provide timestamps with nanoseconds' do
@@ -100,58 +80,6 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
     end
   end
 
-  test "full text search with count='none'" do
-    @controller = Arvados::V1::GroupsController.new
-    authorize_with :admin
-
-    get :contents, params: {
-      format: :json,
-      count: 'none',
-      limit: 1000,
-      filters: [['any', '@@', Rails.configuration.ClusterID]],
-    }
-
-    assert_response :success
-
-    all_objects = Hash.new(0)
-    json_response['items'].map{|o| o['kind']}.each{|t| all_objects[t] += 1}
-
-    assert_equal true, all_objects['arvados#group']>0
-    assert_equal true, all_objects['arvados#job']>0
-    assert_equal true, all_objects['arvados#pipelineInstance']>0
-    assert_equal true, all_objects['arvados#pipelineTemplate']>0
-
-    # Perform test again mimicking a second page request with:
-    # last_object_class = PipelineInstance
-    #   and hence groups and jobs should not be included in the response
-    # offset = 5, which means first 5 pipeline instances were already received in page 1
-    #   and hence the remaining pipeline instances and all other object types should be included in the response
-
-    @test_counter = 0  # Reset executed action counter
-
-    @controller = Arvados::V1::GroupsController.new
-
-    get :contents, params: {
-      format: :json,
-      count: 'none',
-      limit: 1000,
-      offset: '5',
-      last_object_class: 'PipelineInstance',
-      filters: [['any', '@@', Rails.configuration.ClusterID]],
-    }
-
-    assert_response :success
-
-    second_page = Hash.new(0)
-    json_response['items'].map{|o| o['kind']}.each{|t| second_page[t] += 1}
-
-    assert_equal false, second_page.include?('arvados#group')
-    assert_equal false, second_page.include?('arvados#job')
-    assert_equal true, second_page['arvados#pipelineInstance']>0
-    assert_equal all_objects['arvados#pipelineInstance'], second_page['arvados#pipelineInstance']+5
-    assert_equal true, second_page['arvados#pipelineTemplate']>0
-  end
-
   [['prop1', '=', 'value1', [:collection_with_prop1_value1], [:collection_with_prop1_value2, :collection_with_prop2_1]],
    ['prop1', '!=', 'value1', [:collection_with_prop1_value2, :collection_with_prop2_1], [:collection_with_prop1_value1]],
    ['prop1', 'exists', true, [:collection_with_prop1_value1, :collection_with_prop1_value2, :collection_with_prop1_value3, :collection_with_prop1_other1], [:collection_with_prop2_1]],
index 73cbad64303391e82ef593d7a9cffc080ae6084f..070e964e538c6d0f23992b5d1426be7f88f7146d 100644 (file)
@@ -373,75 +373,6 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "search collection using full text search" do
-    # create collection to be searched for
-    signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active))
-    post "/arvados/v1/collections",
-      params: {
-        format: :json,
-        collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json,
-      },
-      headers: auth(:active)
-    assert_response :success
-    assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
-
-    # search using the filename
-    search_using_full_text_search 'subdir2', 0
-    search_using_full_text_search 'subdir2:*', 1
-    search_using_full_text_search 'subdir2/subdir3/subdir4', 1
-    search_using_full_text_search 'file4:*', 1
-    search_using_full_text_search 'file4_in_subdir4.txt', 1
-    search_using_full_text_search 'subdir2 file4:*', 0      # first word is incomplete
-    search_using_full_text_search 'subdir2/subdir3/subdir4 file4:*', 1
-    search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1
-    search_using_full_text_search 'ile4', 0                 # not a prefix match
-  end
-
-  def search_using_full_text_search search_filter, expected_items
-    get '/arvados/v1/collections',
-      params: {:filters => [['any', '@@', search_filter]].to_json},
-      headers: auth(:active)
-    assert_response :success
-    response_items = json_response['items']
-    assert_not_nil response_items
-    if expected_items == 0
-      assert_empty response_items
-    else
-      refute_empty response_items
-      first_item = response_items.first
-      assert_not_nil first_item
-    end
-  end
-
-  # search for the filename in the file_names column and expect error
-  test "full text search not supported for individual columns" do
-    get '/arvados/v1/collections',
-      params: {:filters => [['name', '@@', 'General']].to_json},
-      headers: auth(:active)
-    assert_response 422
-  end
-
-  [
-    'quick fox',
-    'quick_brown fox',
-    'brown_ fox',
-    'fox dogs',
-  ].each do |search_filter|
-    test "full text search ignores special characters and finds with filter #{search_filter}" do
-      # description: The quick_brown_fox jumps over the lazy_dog
-      # full text search treats '_' as space apparently
-      get '/arvados/v1/collections',
-        params: {:filters => [['any', '@@', search_filter]].to_json},
-        headers: auth(:active)
-      assert_response 200
-      response_items = json_response['items']
-      assert_not_nil response_items
-      first_item = response_items.first
-      refute_empty first_item
-      assert_equal first_item['description'], 'The quick_brown_fox jumps over the lazy_dog'
-    end
-  end
-
   test "create and get collection with properties" do
     # create collection to be searched for
     signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
index aa67166f7e613a7b71f1ce8b798cf3b23b060e4a..e76f2b54068ad729fe94f87a3d2150846674db0b 100644 (file)
@@ -64,46 +64,6 @@ class GroupsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  [
-    ['Collection_', true],            # collections and pipelines templates
-    ['hash', true],                   # pipeline templates
-    ['fa7aeb5140e2848d39b', false],   # script_parameter of pipeline instances
-    ['fa7aeb5140e2848d39b:*', true],  # script_parameter of pipeline instances
-    ['project pipeline', true],       # finds "Completed pipeline in A Project"
-    ['project pipeli:*', true],       # finds "Completed pipeline in A Project"
-    ['proje pipeli:*', false],        # first word is incomplete, so no prefix match
-    ['no-such-thing', false],         # script_parameter of pipeline instances
-  ].each do |search_filter, expect_results|
-    test "full text search of group-owned objects for #{search_filter}" do
-      get "/arvados/v1/groups/contents",
-        params: {
-          id: groups(:aproject).uuid,
-          limit: 5,
-          :filters => [['any', '@@', search_filter]].to_json
-        },
-        headers: auth(:active)
-      assert_response :success
-      if expect_results
-        refute_empty json_response['items']
-        json_response['items'].each do |item|
-          assert item['uuid']
-          assert_equal groups(:aproject).uuid, item['owner_uuid']
-        end
-      else
-        assert_empty json_response['items']
-      end
-    end
-  end
-
-  test "full text search is not supported for individual columns" do
-    get "/arvados/v1/groups/contents",
-      params: {
-        :filters => [['name', '@@', 'Private']].to_json
-      },
-      headers: auth(:active)
-    assert_response 422
-  end
-
   test "group contents with include trash collections" do
     get "/arvados/v1/groups/contents",
       params: {
index 64f78071350a6736994986eff3267c541e72b4f6..1e2e08059ef92c75827bcea9baa5d95edc2945c4 100644 (file)
@@ -155,51 +155,6 @@ class ArvadosModelTest < ActiveSupport::TestCase
     end
   end
 
-  test "full text search index exists on models" do
-    indexes = {}
-    conn = ActiveRecord::Base.connection
-    conn.exec_query("SELECT i.relname as indname,
-      i.relowner as indowner,
-      idx.indrelid::regclass::text as table,
-      am.amname as indam,
-      idx.indkey,
-      ARRAY(
-            SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
-                   FROM generate_subscripts(idx.indkey, 1) as k
-                   ORDER BY k
-                   ) as keys,
-      idx.indexprs IS NOT NULL as indexprs,
-      idx.indpred IS NOT NULL as indpred
-      FROM   pg_index as idx
-      JOIN   pg_class as i
-      ON     i.oid = idx.indexrelid
-      JOIN   pg_am as am
-      ON     i.relam = am.oid
-      JOIN   pg_namespace as ns
-      ON     ns.oid = i.relnamespace
-      AND    ns.nspname = ANY(current_schemas(false))").each do |idx|
-      if idx['keys'].match(/to_tsvector/)
-        indexes[idx['table']] ||= []
-        indexes[idx['table']] << idx
-      end
-    end
-    fts_tables =  ["collections", "container_requests", "groups", "jobs",
-                   "pipeline_instances", "pipeline_templates", "workflows"]
-    fts_tables.each do |table|
-      table_class = table.classify.constantize
-      if table_class.respond_to?('full_text_searchable_columns')
-        expect = table_class.full_text_searchable_columns
-        ok = false
-        indexes[table].andand.each do |idx|
-          if expect == idx['keys'].scan(/COALESCE\(([A-Za-z_]+)/).flatten
-            ok = true
-          end
-        end
-        assert ok, "#{table} has no full-text index\nexpect: #{expect.inspect}\nfound: #{indexes[table].inspect}"
-      end
-    end
-  end
-
   [
     %w[collections collections_trgm_text_search_idx],
     %w[container_requests container_requests_trgm_text_search_idx],
index 78cbd0d8cfd06f1c638549151c56e74a32025237..2b963d9a68659c342de818af52789f0d96031ef3 100644 (file)
@@ -298,20 +298,52 @@ class CollectionDirectoryBase(Directory):
     def on_event(self, event, collection, name, item):
         if collection == self.collection:
             name = self.sanitize_filename(name)
-            _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
-            with llfuse.lock:
-                if event == arvados.collection.ADD:
-                    self.new_entry(name, item, self.mtime())
-                elif event == arvados.collection.DEL:
-                    ent = self._entries[name]
-                    del self._entries[name]
-                    self.inodes.invalidate_entry(self, name)
-                    self.inodes.del_entry(ent)
-                elif event == arvados.collection.MOD:
-                    if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
-                        self.inodes.invalidate_inode(item.fuse_entry)
-                    elif name in self._entries:
-                        self.inodes.invalidate_inode(self._entries[name])
+
+            #
+            # It's possible for another thread to have llfuse.lock and
+            # be waiting on collection.lock.  Meanwhile, we released
+            # llfuse.lock earlier in the stack, but are still holding
+            # on to the collection lock, and now we need to re-acquire
+            # llfuse.lock.  If we don't release the collection lock,
+            # we'll deadlock where we're holding the collection lock
+            # waiting for llfuse.lock and the other thread is holding
+            # llfuse.lock and waiting for the collection lock.
+            #
+            # The correct locking order here is to take llfuse.lock
+            # first, then the collection lock.
+            #
+            # Since collection.lock is an RLock, it might be locked
+            # multiple times, so we need to release it multiple times,
+            # keep a count, then re-lock it the correct number of
+            # times.
+            #
+            lockcount = 0
+            try:
+                while True:
+                    self.collection.lock.release()
+                    lockcount += 1
+            except RuntimeError:
+                pass
+
+            try:
+                with llfuse.lock:
+                    with self.collection.lock:
+                        if event == arvados.collection.ADD:
+                            self.new_entry(name, item, self.mtime())
+                        elif event == arvados.collection.DEL:
+                            ent = self._entries[name]
+                            del self._entries[name]
+                            self.inodes.invalidate_entry(self, name)
+                            self.inodes.del_entry(ent)
+                        elif event == arvados.collection.MOD:
+                            if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+                                self.inodes.invalidate_inode(item.fuse_entry)
+                            elif name in self._entries:
+                                self.inodes.invalidate_inode(self._entries[name])
+            finally:
+                while lockcount > 0:
+                    self.collection.lock.acquire()
+                    lockcount -= 1
 
     def populate(self, mtime):
         self._mtime = mtime
@@ -587,10 +619,26 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     def on_event(self, *args, **kwargs):
         super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
         if self.collection_record_file:
-            with llfuse.lock:
-                self.collection_record_file.invalidate()
-            self.inodes.invalidate_inode(self.collection_record_file)
-            _logger.debug("%s invalidated collection record", self)
+
+            # See discussion in CollectionDirectoryBase.on_event
+            lockcount = 0
+            try:
+                while True:
+                    self.collection.lock.release()
+                    lockcount += 1
+            except RuntimeError:
+                pass
+
+            try:
+                with llfuse.lock:
+                    with self.collection.lock:
+                        self.collection_record_file.invalidate()
+                        self.inodes.invalidate_inode(self.collection_record_file)
+                        _logger.debug("%s invalidated collection record", self)
+            finally:
+                while lockcount > 0:
+                    self.collection.lock.acquire()
+                    lockcount -= 1
 
     def collection_record(self):
         with llfuse.lock_released:
index 897447dd11c7a95a5b113d867fb0de28cbed6844..16dcd2aaf6ee5d57e9bb60176a643a9116df8f9e 100644 (file)
@@ -23,6 +23,7 @@ import (
        "os"
        "sort"
        "strings"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/lib/config"
@@ -367,6 +368,94 @@ func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) {
        }
 }
 
+func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) {
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-111111111111111": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       ReadOnly:       true,
+                       StorageClasses: map[string]bool{"class1": true}},
+       }
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+       resp := IssueRequest(s.handler,
+               &RequestTester{
+                       method:         "PUT",
+                       uri:            "/" + TestHash,
+                       requestBody:    TestBlock,
+                       storageClasses: "class1",
+               })
+       c.Check(resp.Code, check.Equals, FullError.HTTPCode)
+       c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0)
+}
+
+func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) {
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-111111111111111": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class1": true}},
+               "zzzzz-nyw5e-121212121212121": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class1": true, "class2": true}},
+               "zzzzz-nyw5e-222222222222222": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class2": true}},
+       }
+
+       for _, trial := range []struct {
+               setCounter uint32 // value to stuff vm.counter, to control offset
+               classes    string // desired classes
+               put111     int    // expected number of "put" ops on 11111... after 2x put reqs
+               put121     int    // expected number of "put" ops on 12121...
+               put222     int    // expected number of "put" ops on 22222...
+               cmp111     int    // expected number of "compare" ops on 11111... after 2x put reqs
+               cmp121     int    // expected number of "compare" ops on 12121...
+               cmp222     int    // expected number of "compare" ops on 22222...
+       }{
+               {0, "class1",
+                       1, 0, 0,
+                       2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121
+               {0, "class2",
+                       0, 1, 0,
+                       0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121
+               {0, "class1,class2",
+                       1, 1, 0,
+                       2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+               {1, "class1,class2",
+                       0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121
+                       2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+               {0, "class1,class2,class404",
+                       1, 1, 0,
+                       2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121
+       } {
+               c.Logf("%+v", trial)
+               s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+                       "class1": {},
+                       "class2": {},
+                       "class3": {},
+               }
+               c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+               atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter)
+               for i := 0; i < 2; i++ {
+                       IssueRequest(s.handler,
+                               &RequestTester{
+                                       method:         "PUT",
+                                       uri:            "/" + TestHash,
+                                       requestBody:    TestBlock,
+                                       storageClasses: trial.classes,
+                               })
+               }
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222)
+       }
+}
+
 // Test TOUCH requests.
 func (s *HandlerSuite) TestTouchHandler(c *check.C) {
        c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
index 2b469a13eb993e0827bac8ae1ebe4db46bc8c4df..910033ebb1d8408c90a4bde441d7edc8d99b109a 100644 (file)
@@ -18,6 +18,7 @@ import (
        "strconv"
        "strings"
        "sync"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
@@ -741,6 +742,7 @@ func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []b
 }
 
 type putProgress struct {
+       classNeeded      map[string]bool
        classTodo        map[string]bool
        mountUsed        map[*VolumeMount]bool
        totalReplication int
@@ -769,7 +771,7 @@ func (pr putProgress) ClassReplication() string {
 
 func (pr *putProgress) Add(mnt *VolumeMount) {
        if pr.mountUsed[mnt] {
-               logrus.Warnf("BUG? superfluous extra write to mount %s", mnt)
+               logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID)
                return
        }
        pr.mountUsed[mnt] = true
@@ -780,6 +782,21 @@ func (pr *putProgress) Add(mnt *VolumeMount) {
        }
 }
 
+func (pr *putProgress) Sub(mnt *VolumeMount) {
+       if !pr.mountUsed[mnt] {
+               logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID)
+               return
+       }
+       pr.mountUsed[mnt] = false
+       pr.totalReplication -= mnt.Replication
+       for class := range mnt.StorageClasses {
+               pr.classDone[class] -= mnt.Replication
+               if pr.classNeeded[class] {
+                       pr.classTodo[class] = true
+               }
+       }
+}
+
 func (pr *putProgress) Done() bool {
        return len(pr.classTodo) == 0 && pr.totalReplication > 0
 }
@@ -800,47 +817,65 @@ func (pr *putProgress) Want(mnt *VolumeMount) bool {
        return false
 }
 
-func newPutResult(classes []string) putProgress {
+func (pr *putProgress) Copy() *putProgress {
+       cp := putProgress{
+               classNeeded:      pr.classNeeded,
+               classTodo:        make(map[string]bool, len(pr.classTodo)),
+               classDone:        make(map[string]int, len(pr.classDone)),
+               mountUsed:        make(map[*VolumeMount]bool, len(pr.mountUsed)),
+               totalReplication: pr.totalReplication,
+       }
+       for k, v := range pr.classTodo {
+               cp.classTodo[k] = v
+       }
+       for k, v := range pr.classDone {
+               cp.classDone[k] = v
+       }
+       for k, v := range pr.mountUsed {
+               cp.mountUsed[k] = v
+       }
+       return &cp
+}
+
+func newPutProgress(classes []string) putProgress {
        pr := putProgress{
-               classTodo: make(map[string]bool, len(classes)),
-               classDone: map[string]int{},
-               mountUsed: map[*VolumeMount]bool{},
+               classNeeded: make(map[string]bool, len(classes)),
+               classTodo:   make(map[string]bool, len(classes)),
+               classDone:   map[string]int{},
+               mountUsed:   map[*VolumeMount]bool{},
        }
        for _, c := range classes {
                if c != "" {
+                       pr.classNeeded[c] = true
                        pr.classTodo[c] = true
                }
        }
        return pr
 }
 
-// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-// PutBlock(ctx, block, hash)
-//   Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-//   The MD5 checksum of the block must be identical to the content id HASH.
-//   If not, an error is returned.
+// PutBlock stores the given block on one or more volumes.
 //
-//   PutBlock stores the BLOCK on the first Keep volume with free space.
-//   A failure code is returned to the user only if all volumes fail.
+// The MD5 checksum of the block must match the given hash.
 //
-//   On success, PutBlock returns nil.
-//   On failure, it returns a KeepError with one of the following codes:
+// The block is written to each writable volume (ordered by priority
+// and then UUID, see volume.go) until at least one replica has been
+// stored in each of the requested storage classes.
 //
-//   500 Collision
-//          A different block with the same hash already exists on this
-//          Keep server.
-//   422 MD5Fail
-//          The MD5 hash of the BLOCK does not match the argument HASH.
-//   503 Full
-//          There was not enough space left in any Keep volume to store
-//          the object.
-//   500 Fail
-//          The object could not be stored for some other reason (e.g.
-//          all writes failed). The text of the error message should
-//          provide as much detail as possible.
+// The returned error, if any, is a KeepError with one of the
+// following codes:
 //
+// 500 Collision
+//        A different block with the same hash already exists on this
+//        Keep server.
+// 422 MD5Fail
+//        The MD5 hash of the BLOCK does not match the argument HASH.
+// 503 Full
+//        There was not enough space left in any Keep volume to store
+//        the object.
+// 500 Fail
+//        The object could not be stored for some other reason (e.g.
+//        all writes failed). The text of the error message should
+//        provide as much detail as possible.
 func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) {
        log := ctxlog.FromContext(ctx)
 
@@ -851,72 +886,88 @@ func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash s
                return putProgress{}, RequestHashError
        }
 
-       result := newPutResult(wantStorageClasses)
+       result := newPutProgress(wantStorageClasses)
 
        // If we already have this data, it's intact on disk, and we
        // can update its timestamp, return success. If we have
        // different data with the same hash, return failure.
-       if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil {
+       if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() {
                return result, err
        }
        if ctx.Err() != nil {
                return result, ErrClientDisconnect
        }
 
-       // Choose a Keep volume to write to.
-       // If this volume fails, try all of the volumes in order.
-       if mnt := volmgr.NextWritable(); mnt == nil || !result.Want(mnt) {
-               // fall through to "try all volumes" below
-       } else if err := mnt.Put(ctx, hash, block); err != nil {
-               log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-       } else {
-               result.Add(mnt)
-               if result.Done() {
-                       return result, nil
-               }
-       }
-       if ctx.Err() != nil {
-               return putProgress{}, ErrClientDisconnect
-       }
-
-       writables := volmgr.AllWritable()
+       writables := volmgr.NextWritable()
        if len(writables) == 0 {
                log.Error("no writable volumes")
-               return putProgress{}, FullError
+               return result, FullError
        }
 
-       allFull := true
+       var wg sync.WaitGroup
+       var mtx sync.Mutex
+       cond := sync.Cond{L: &mtx}
+       // pending predicts what result will be if all pending writes
+       // succeed.
+       pending := result.Copy()
+       var allFull atomic.Value
+       allFull.Store(true)
+
+       // We hold the lock for the duration of the "each volume" loop
+       // below, except when it is released during cond.Wait().
+       mtx.Lock()
+
        for _, mnt := range writables {
+               // Wait until our decision to use this mount does not
+               // depend on the outcome of pending writes.
+               for result.Want(mnt) && !pending.Want(mnt) {
+                       cond.Wait()
+               }
                if !result.Want(mnt) {
                        continue
                }
-               err := mnt.Put(ctx, hash, block)
-               if ctx.Err() != nil {
-                       return result, ErrClientDisconnect
-               }
-               switch err {
-               case nil:
-                       result.Add(mnt)
-                       if result.Done() {
-                               return result, nil
+               mnt := mnt
+               pending.Add(mnt)
+               wg.Add(1)
+               go func() {
+                       log.Debugf("PutBlock: start write to %s", mnt.UUID)
+                       defer wg.Done()
+                       err := mnt.Put(ctx, hash, block)
+
+                       mtx.Lock()
+                       if err != nil {
+                               log.Debugf("PutBlock: write to %s failed", mnt.UUID)
+                               pending.Sub(mnt)
+                       } else {
+                               log.Debugf("PutBlock: write to %s succeeded", mnt.UUID)
+                               result.Add(mnt)
                        }
-                       continue
-               case FullError:
-                       continue
-               default:
-                       // The volume is not full but the
-                       // write did not succeed.  Report the
-                       // error and continue trying.
-                       allFull = false
-                       log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-               }
+                       cond.Broadcast()
+                       mtx.Unlock()
+
+                       if err != nil && err != FullError && ctx.Err() == nil {
+                               // The volume is not full but the
+                               // write did not succeed.  Report the
+                               // error and continue trying.
+                               allFull.Store(false)
+                               log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
+                       }
+               }()
+       }
+       mtx.Unlock()
+       wg.Wait()
+       if ctx.Err() != nil {
+               return result, ErrClientDisconnect
+       }
+       if result.Done() {
+               return result, nil
        }
 
        if result.totalReplication > 0 {
                // Some, but not all, of the storage classes were
                // satisfied. This qualifies as success.
                return result, nil
-       } else if allFull {
+       } else if allFull.Load().(bool) {
                log.Error("all volumes with qualifying storage classes are full")
                return putProgress{}, FullError
        } else {
index 9bfc6ca3e5191d2953ceac75f915a07cab19c69f..3f7c9cb79b4b24b71c3c441e49235fd657d77e69 100644 (file)
@@ -344,11 +344,11 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my
                        vm.writables = append(vm.writables, mnt)
                }
        }
-       // pri(i): return highest priority of any storage class
-       // offered by vm.readables[i]
-       pri := func(i int) int {
+       // pri(mnt): return highest priority of any storage class
+       // offered by mnt
+       pri := func(mnt *VolumeMount) int {
                any, best := false, 0
-               for class := range vm.readables[i].KeepMount.StorageClasses {
+               for class := range mnt.KeepMount.StorageClasses {
                        if p := cluster.StorageClasses[class].Priority; !any || best < p {
                                best = p
                                any = true
@@ -356,14 +356,20 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my
                }
                return best
        }
-       // sort vm.readables, first by highest priority of any offered
+       // less(a,b): sort first by highest priority of any offered
        // storage class (highest->lowest), then by volume UUID
-       sort.Slice(vm.readables, func(i, j int) bool {
-               if pi, pj := pri(i), pri(j); pi != pj {
-                       return pi > pj
+       less := func(a, b *VolumeMount) bool {
+               if pa, pb := pri(a), pri(b); pa != pb {
+                       return pa > pb
                } else {
-                       return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID
+                       return a.KeepMount.UUID < b.KeepMount.UUID
                }
+       }
+       sort.Slice(vm.readables, func(i, j int) bool {
+               return less(vm.readables[i], vm.readables[j])
+       })
+       sort.Slice(vm.writables, func(i, j int) bool {
+               return less(vm.writables[i], vm.writables[j])
        })
        return vm, nil
 }
@@ -384,18 +390,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount {
        return vm.readables
 }
 
-// AllWritable returns an array of all writable volumes
+// AllWritable returns writable volumes, sorted by priority/uuid. Used
+// by CompareAndTouch to ensure higher-priority volumes are checked
+// first.
 func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
        return vm.writables
 }
 
-// NextWritable returns the next writable
-func (vm *RRVolumeManager) NextWritable() *VolumeMount {
+// NextWritable returns writable volumes, rotated by vm.counter so
+// each volume gets a turn to be first. Used by PutBlock to distribute
+// new data across available volumes.
+func (vm *RRVolumeManager) NextWritable() []*VolumeMount {
        if len(vm.writables) == 0 {
                return nil
        }
-       i := atomic.AddUint32(&vm.counter, 1)
-       return vm.writables[i%uint32(len(vm.writables))]
+       offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables)
+       return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...)
 }
 
 // VolumeStats returns an ioStats for the given volume.
index 79f0d3f4f6c2f0a21ddc5ab3d1e711831c1be896..c112972c4303103a6fee1fc920fa309022b340ee 100644 (file)
@@ -73,7 +73,7 @@ ENV DEBIAN_FRONTEND noninteractive
 #  gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less
 RUN apt-get update && \
     apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
-    gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less && \
+    gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less vim-tiny && \
     apt-get clean
 
 ENV GOPATH /var/lib/gopath
index fb3eaaeee875e147f761cef7dbb8f317be7aaa31..a112cb93fe07cadbcfb814606497df0b0e9328f8 100755 (executable)
@@ -59,5 +59,6 @@ fi
 export VERSION=$(./version-at-commit.sh)
 export BROWSER=none
 export CI=true
+export HTTPS=false
 node --version
 exec node node_modules/react-scripts/scripts/start.js
index 3019a9fb1cb50ac5595c0b76228489f2b1e9e4d4..a3463bfc5c5f796b414adb68747b16ff975427eb 100644 (file)
@@ -35,7 +35,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
                                cp -vr /vagrant/tests /home/vagrant/tests;
                                sed 's#cluster_fixme_or_this_wont_work#harpo#g;
                                     s#domain_fixme_or_this_wont_work#local#g;
-                                    s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g;
+                                    s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g;
                                     s#CONTROLLER_EXT_SSL_PORT=443#CONTROLLER_EXT_SSL_PORT=8443#g' \
                                     /vagrant/local.params.example.single_host_multiple_hostnames > /tmp/local.params.single_host_multiple_hostnames"
      arv.vm.provision "shell",
@@ -78,7 +78,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
                                cp -vr /vagrant/tests /home/vagrant/tests;
                                sed 's#HOSTNAME_EXT=\"\"#HOSTNAME_EXT=\"zeppo.local\"#g;
                                     s#cluster_fixme_or_this_wont_work#zeppo#g;
-                                    s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g;
+                                    s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g;
                                     s#domain_fixme_or_this_wont_work#local#g;' \
                                     /vagrant/local.params.example.single_host_single_hostname > /tmp/local.params.single_host_single_hostname"
      arv.vm.provision "shell",
index 23e007650480ab28414b5bbbd4251cd655e75f3b..ccf6bac7895e1e873a05c04200f77eeecdea702d 100644 (file)
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+# vim: ft=yaml
 ---
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
@@ -26,6 +28,7 @@ arvados:
   ## manage OS packages with some other tool and you don't want us messing up
   ## with your setup.
   ruby:
+
     ## We set these to `true` here for testing purposes.
     ## They both default to `false`.
     manage_ruby: true
@@ -67,8 +70,15 @@ arvados:
       host: 127.0.0.1
       password: "__DATABASE_PASSWORD__"
       user: __CLUSTER___arvados
-      encoding: en_US.utf8
-      client_encoding: UTF8
+      extra_conn_params:
+        client_encoding: UTF8
+      # Centos7 does not enable SSL by default, so we disable
+      # it here just for testing of the formula purposes only.
+      # You should not do this in production, and should
+      # configure Postgres certificates correctly
+      {%- if grains.os_family in ('RedHat',) %}
+        sslmode: disable
+      {%- endif %}
 
     tls:
       # certificate: ''
@@ -76,6 +86,13 @@ arvados:
       # required to test with arvados-snakeoil certs
       insecure: true
 
+    resources:
+      virtual_machines:
+        shell:
+          name: webshell
+          backend: 127.0.1.1
+          port: 4200
+
     ### TOKENS
     tokens:
       system_root: __SYSTEM_ROOT_TOKEN__
index b2f12c77399bdd9df8c48f7d3ac9f9004670f1aa..54087f6d6d0fe43ae9c1a12e71ac2604935a2635 100644 (file)
@@ -3,17 +3,23 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- if grains.os_family in ('RedHat',) %}
+  {%- set group = 'nginx' %}
+{%- else %}
+  {%- set group = 'www-data' %}
+{%- endif %}
+
 ### ARVADOS
 arvados:
   config:
-    group: www-data
+    group: {{ group }}
 
 ### NGINX
 nginx:
   ### SITES
   servers:
     managed:
-      arvados_api:
+      arvados_api.conf:
         enabled: true
         overwrite: true
         config:
index 3adf0580a43647e2919b37c796b39b79d89001e4..195e9af82e5f3b84187c6467eb229ae4284e5d0c 100644 (file)
@@ -20,7 +20,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_controller_default:
+      arvados_controller_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -33,9 +33,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_controller_ssl:
+      arvados_controller_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: __CLUSTER__.__DOMAIN__
@@ -52,7 +54,8 @@ nginx:
               - proxy_set_header: 'X-Real-IP $remote_addr'
               - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
               - proxy_set_header: 'X-External-Client $external_client'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.error.log
             - client_max_body_size: 128m
index 2d8922df9a8c727768bd4d57d69c6adab5c0fef3..91179d4a867271a2bfc4c6f1b6d4338e2fb2ee60 100644 (file)
@@ -16,7 +16,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_keepproxy_default:
+      arvados_keepproxy_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -29,9 +29,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_keepproxy_ssl:
+      arvados_keepproxy_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: keep.__CLUSTER__.__DOMAIN__
@@ -52,6 +54,7 @@ nginx:
             - client_max_body_size: 64M
             - proxy_http_version: '1.1'
             - proxy_request_buffering: 'off'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.error.log
index d180a3bad42e974d7d1796673e5f04df5a94e3ae..9ea16bfb546eea46e0f420eb6fbd9fa330dbaa52 100644 (file)
@@ -16,7 +16,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_collections_download_default:
+      arvados_collections_download_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -30,9 +30,11 @@ nginx:
               - return: '301 https://$host$request_uri'
 
       ### COLLECTIONS / DOWNLOAD
-      arvados_collections_download_ssl:
+      arvados_collections_download_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: collections.__CLUSTER__.__DOMAIN__ download.__CLUSTER__.__DOMAIN__
@@ -52,6 +54,7 @@ nginx:
             - client_max_body_size: 0
             - proxy_http_version: '1.1'
             - proxy_request_buffering: 'off'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.error.log
index 6ce75faa70c3d135076ffcf05d0b6dd2fcc76eef..a4d3c34f260e3cb5905830c40e19388f31561415 100644 (file)
@@ -3,19 +3,69 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- set passenger_pkg = 'nginx-mod-http-passenger'
+                          if grains.osfinger in ('CentOS Linux-7') else
+                        'libnginx-mod-http-passenger' %}
+{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so'
+                          if grains.osfinger in ('CentOS Linux-7',) else
+                        '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %}
+{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby'
+                           if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else
+                         '/usr/bin/ruby' %}
+
 ### NGINX
 nginx:
   install_from_phusionpassenger: true
   lookup:
-    passenger_package: libnginx-mod-http-passenger
-    passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf
+    passenger_package: {{ passenger_pkg }}
+  ### PASSENGER
+  passenger:
+    passenger_ruby: {{ passenger_ruby }}
 
   ### SERVER
   server:
     config:
-      include: 'modules-enabled/*.conf'
+      # This is required to get the passenger module loaded
+      # In Debian it can be done with this
+      # include: 'modules-enabled/*.conf'
+      load_module: {{ passenger_mod }}
+
       worker_processes: 4
 
+  ### SNIPPETS
+  snippets:
+    # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4
+    ssl_hardening_default.conf:
+      - ssl_session_timeout: 1d
+      - ssl_session_cache: 'shared:arvadosSSL:10m'
+      - ssl_session_tickets: 'off'
+
+      # intermediate configuration
+      - ssl_protocols: TLSv1.2 TLSv1.3
+      - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
+      - ssl_prefer_server_ciphers: 'off'
+
+      # HSTS (ngx_http_headers_module is required) (63072000 seconds)
+      - add_header: 'Strict-Transport-Security "max-age=63072000" always'
+
+      # OCSP stapling
+      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # - ssl_stapling: 'on'
+      # - ssl_stapling_verify: 'on'
+
+      # verify chain of trust of OCSP response using Root CA and Intermediate certs
+      # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates
+
+      # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam
+      # - ssl_dhparam: /path/to/dhparam
+
+      # replace with the IP address of your resolver
+      # - resolver: 127.0.0.1
+
+    arvados-snakeoil.conf:
+      - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem
+      - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key
+
   ### SITES
   servers:
     managed:
index e75f0443434285785b2b5444f83524f6d94058a5..9b73ab4a09e7282774e11e3c36ac02afb30983d7 100644 (file)
@@ -3,6 +3,20 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+# This parameter will be used here to generate a list of upstreams and vhosts.
+# This dict is here for convenience and should be managed some other way, but the
+# different ways of orchestration that can be used for this are outside the scope
+# of this formula and their examples.
+# These upstreams should match those defined in `arvados:cluster:resources:virtual_machines`
+{% set webshell_virtual_machines = {
+  'shell': {
+    'name': 'webshell',
+    'backend': '127.0.1.1',
+    'port': 4200,
+  }
+}
+%}
+
 ### NGINX
 nginx:
   ### SERVER
@@ -11,13 +25,20 @@ nginx:
 
       ### STREAMS
       http:
-        upstream webshell_upstream:
-          - server: 'shell.internal:4200 fail_timeout=10s'
+        {%- for vm, params in webshell_virtual_machines.items() %}
+          {%- set vm_name = params.name | default(vm) %}
+          {%- set vm_backend = params.backend | default(vm_name) %}
+          {%- set vm_port = params.port | default(4200) %}
+
+        upstream {{ vm_name }}_upstream:
+          - server: '{{ vm_backend }}:{{ vm_port }} fail_timeout=10s'
+
+        {%- endfor %}
 
   ### SITES
   servers:
     managed:
-      arvados_webshell_default:
+      arvados_webshell_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -30,17 +51,21 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_webshell_ssl:
+      arvados_webshell_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: webshell.__CLUSTER__.__DOMAIN__
             - listen:
               - __CONTROLLER_EXT_SSL_PORT__ http2 ssl
             - index: index.html index.htm
-            - location /shell.__CLUSTER__.__DOMAIN__:
-              - proxy_pass: 'http://webshell_upstream'
+            {%- for vm, params in webshell_virtual_machines.items() %}
+              {%- set vm_name = params.name | default(vm) %}
+            - location /{{ vm_name }}:
+              - proxy_pass: 'http://{{ vm_name }}_upstream'
               - proxy_read_timeout: 90
               - proxy_connect_timeout: 90
               - proxy_set_header: 'Host $http_host'
@@ -67,8 +92,9 @@ nginx:
                 - add_header: "'Access-Control-Allow-Origin' '*'"
                 - add_header: "'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'"
                 - add_header: "'Access-Control-Allow-Headers' 'DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type'"
-
-            - include: 'snippets/arvados-snakeoil.conf'
+            {%- endfor %}
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.error.log
 
index 3a354ac293de96d93faac2c9013750ac825287aa..bcd0457c9e18f2012e7d19254814fb24f0dfbe93 100644 (file)
@@ -16,7 +16,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_websocket_default:
+      arvados_websocket_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -29,9 +29,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_websocket_ssl:
+      arvados_websocket_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: ws.__CLUSTER__.__DOMAIN__
@@ -53,6 +55,7 @@ nginx:
             - client_max_body_size: 64M
             - proxy_http_version: '1.1'
             - proxy_request_buffering: 'off'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.error.log
index 8fdd553991ed86be5d83adb056e12f6348a9bdee..44bd16fe3e9e94cf1a75f7f8edbd13a3b11fd848 100644 (file)
@@ -1,12 +1,18 @@
 ---
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
+
+{%- if grains.os_family in ('RedHat',) %}
+  {%- set group = 'nginx' %}
+{%- else %}
+  {%- set group = 'www-data' %}
+{%- endif %}
 
 ### ARVADOS
 arvados:
   config:
-    group: www-data
+    group: {{ group }}
 
 ### NGINX
 nginx:
@@ -14,7 +20,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_workbench2_default:
+      arvados_workbench2_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -27,9 +33,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_workbench2_ssl:
+      arvados_workbench2_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: workbench2.__CLUSTER__.__DOMAIN__
@@ -43,6 +51,7 @@ nginx:
                 - return: 503
             - location /config.json:
               - return: {{ "200 '" ~ '{"API_HOST":"__CLUSTER__.__DOMAIN__:__CONTROLLER_EXT_SSL_PORT__"}' ~ "'" }}
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.error.log
index 649af10b6d8b5c497b5cde653df2aef2e86e0f6a..6b7ab969f964606bd88d50ec4a3f66cb63f517d6 100644 (file)
@@ -3,10 +3,16 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- if grains.os_family in ('RedHat',) %}
+  {%- set group = 'nginx' %}
+{%- else %}
+  {%- set group = 'www-data' %}
+{%- endif %}
+
 ### ARVADOS
 arvados:
   config:
-    group: www-data
+    group: {{ group }}
 
 ### NGINX
 nginx:
@@ -23,7 +29,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_workbench_default:
+      arvados_workbench_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -36,9 +42,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_workbench_ssl:
+      arvados_workbench_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: workbench.__CLUSTER__.__DOMAIN__
@@ -54,11 +62,12 @@ nginx:
               - proxy_set_header: 'Host $http_host'
               - proxy_set_header: 'X-Real-IP $remote_addr'
               - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.error.log
 
-      arvados_workbench_upstream:
+      arvados_workbench_upstream.conf:
         enabled: true
         overwrite: true
         config:
index 71e712cad3c278d3ac5bbedc2d3b36bd2fe59993..fda1545a05bcf8048b47f2838a058ccd7c542ffb 100644 (file)
@@ -5,11 +5,29 @@
 
 ### POSTGRESQL
 postgres:
-  use_upstream_repo: false
+  # Centos-7's postgres package is too old, so we need to force using upstream's
+  # This is not required in Debian's family as they already ship with PG +11
+  {%- if salt['grains.get']('os_family') == 'RedHat' %}
+  use_upstream_repo: true
+  version: '12'
+
+  pkgs_deps:
+    - libicu
+    - libxslt
+    - systemd-sysv
+
+  pkgs_extra:
+    - postgresql12-contrib
+
+  {%- else %}
   pkgs_extra:
     - postgresql-contrib
+  {%- endif %}
   postgresconf: |-
     listen_addresses = '*'  # listen on all interfaces
+    #ssl = on
+    #ssl_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem'
+    #ssl_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key'
   acls:
     - ['local', 'all', 'postgres', 'peer']
     - ['local', 'all', 'all', 'peer']
index fb1473def250dea3405890a54de90070d248fae0..91617e4fa4765e5e3365a4269937ac6987a94d17 100644 (file)
@@ -1,15 +1,22 @@
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
 
 {%- set curr_tpldir = tpldir %}
 {%- set tpldir = 'arvados' %}
 {%- from "arvados/map.jinja" import arvados with context %}
 {%- set tpldir = curr_tpldir %}
 
-{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %}
+include:
+  - nginx.passenger
+  - nginx.config
+  - nginx.service
+
+# Debian uses different dirs for certs and keys, but being a Snake Oil example,
+# we'll keep it simple here.
+{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %}
 {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %}
-{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %}
+{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %}
 {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %}
 {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %}
 
@@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in
       - ca-certificates
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run:
-  # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run
+  # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run
   cmd.run:
     - name: |
         # These dirs are not to CentOS-ish, but this is a helper script
@@ -121,6 +128,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c
     - require:
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run
+    # We need this before we can add the nginx's snippet
+    - require_in:
+      - file: nginx_snippet_arvados-snakeoil.conf
 
 {%- if grains.get('os_family') == 'Debian' %}
 arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed:
@@ -130,29 +140,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal
       - sls: postgres
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run:
-  cmd.run:
-    - name: |
-        chown root:ssl-cert {{ arvados_key_file }}
+  file.managed:
+    - name: {{ arvados_key_file }}
+    - owner: root
+    - group: ssl-cert
     - require:
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed
-{%- endif %}
-
-arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed:
-  file.managed:
-    - name: /etc/nginx/snippets/arvados-snakeoil.conf
-    - contents: |
-        ssl_certificate {{ arvados_cert_file }};
-        ssl_certificate_key {{ arvados_key_file }};
-    - watch_in:
-      - service: nginx_service
-    - require:
-      - pkg: passenger_install
-      - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run
     - require_in:
-      - file: nginx_config
-      - service: nginx_service
-    - watch_in:
-      - service: nginx_service
-
-
+      - file: nginx_snippet_arvados-snakeoil.conf
+{%- endif %}
index 6ce75faa70c3d135076ffcf05d0b6dd2fcc76eef..a4d3c34f260e3cb5905830c40e19388f31561415 100644 (file)
@@ -3,19 +3,69 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- set passenger_pkg = 'nginx-mod-http-passenger'
+                          if grains.osfinger in ('CentOS Linux-7') else
+                        'libnginx-mod-http-passenger' %}
+{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so'
+                          if grains.osfinger in ('CentOS Linux-7',) else
+                        '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %}
+{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby'
+                           if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else
+                         '/usr/bin/ruby' %}
+
 ### NGINX
 nginx:
   install_from_phusionpassenger: true
   lookup:
-    passenger_package: libnginx-mod-http-passenger
-    passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf
+    passenger_package: {{ passenger_pkg }}
+  ### PASSENGER
+  passenger:
+    passenger_ruby: {{ passenger_ruby }}
 
   ### SERVER
   server:
     config:
-      include: 'modules-enabled/*.conf'
+      # This is required to get the passenger module loaded
+      # In Debian it can be done with this
+      # include: 'modules-enabled/*.conf'
+      load_module: {{ passenger_mod }}
+
       worker_processes: 4
 
+  ### SNIPPETS
+  snippets:
+    # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4
+    ssl_hardening_default.conf:
+      - ssl_session_timeout: 1d
+      - ssl_session_cache: 'shared:arvadosSSL:10m'
+      - ssl_session_tickets: 'off'
+
+      # intermediate configuration
+      - ssl_protocols: TLSv1.2 TLSv1.3
+      - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
+      - ssl_prefer_server_ciphers: 'off'
+
+      # HSTS (ngx_http_headers_module is required) (63072000 seconds)
+      - add_header: 'Strict-Transport-Security "max-age=63072000" always'
+
+      # OCSP stapling
+      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # - ssl_stapling: 'on'
+      # - ssl_stapling_verify: 'on'
+
+      # verify chain of trust of OCSP response using Root CA and Intermediate certs
+      # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates
+
+      # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam
+      # - ssl_dhparam: /path/to/dhparam
+
+      # replace with the IP address of your resolver
+      # - resolver: 127.0.0.1
+
+    arvados-snakeoil.conf:
+      - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem
+      - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key
+
   ### SITES
   servers:
     managed:
index 130fb5e937affe145b06c9f75b0ec2f6540003c8..b6929fb887ba6827a0979872ccee415a01d22c94 100644 (file)
@@ -1,15 +1,22 @@
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
 
 {%- set curr_tpldir = tpldir %}
 {%- set tpldir = 'arvados' %}
 {%- from "arvados/map.jinja" import arvados with context %}
 {%- set tpldir = curr_tpldir %}
 
-{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %}
+include:
+  - nginx.passenger
+  - nginx.config
+  - nginx.service
+
+# Debian uses different dirs for certs and keys, but being a Snake Oil example,
+# we'll keep it simple here.
+{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %}
 {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %}
-{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %}
+{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %}
 {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %}
 {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %}
 
@@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in
       - ca-certificates
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run:
-  # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run
+  # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run
   cmd.run:
     - name: |
         # These dirs are not to CentOS-ish, but this is a helper script
@@ -124,6 +131,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c
     - require:
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run
+    # We need this before we can add the nginx's snippet
+    - require_in:
+      - file: nginx_snippet_arvados-snakeoil.conf
 
 {%- if grains.get('os_family') == 'Debian' %}
 arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed:
@@ -133,26 +143,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal
       - sls: postgres
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run:
-  cmd.run:
-    - name: |
-        chown root:ssl-cert {{ arvados_key_file }}
+  file.managed:
+    - name: {{ arvados_key_file }}
+    - owner: root
+    - group: ssl-cert
     - require:
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed
-{%- endif %}
-
-arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed:
-  file.managed:
-    - name: /etc/nginx/snippets/arvados-snakeoil.conf
-    - contents: |
-        ssl_certificate {{ arvados_cert_file }};
-        ssl_certificate_key {{ arvados_key_file }};
-    - require:
-      - pkg: nginx_install
     - require_in:
-      - file: nginx_config
-      - service: nginx_service
-    - watch_in:
-      - service: nginx_service
-
-
+      - file: nginx_snippet_arvados-snakeoil.conf
+{%- endif %}
index 17b7b888846fca194a04f60af829dd5ee271a4e5..283c631ec5853d34b63ca5db28e1ebd003225579 100644 (file)
@@ -100,6 +100,6 @@ RELEASE="production"
 # ARVADOS_TAG="2.2.0"
 # POSTGRES_TAG="v0.41.6"
 # NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
index ae54e7437a83db83b7373eaa6ef87d70aa31e8b5..e23634e8c4d6d2a9ec50593bdea3e328618dffd6 100644 (file)
@@ -72,6 +72,6 @@ RELEASE="production"
 # ARVADOS_TAG="2.2.0"
 # POSTGRES_TAG="v0.41.6"
 # NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
index a35bd45bffc258d7c3a8dd4b59eb564bfc13c4b8..ae9804863f4a47dc179ed71efed6d038eb57010c 100644 (file)
@@ -81,6 +81,6 @@ RELEASE="production"
 # ARVADOS_TAG="2.2.0"
 # POSTGRES_TAG="v0.41.6"
 # NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
index 7ac120e5fd89179f75fcf13608679edfaa2b45e5..b840d86c6f360d3440328bd676dd66656739be5b 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/bash
 
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
@@ -11,6 +11,7 @@
 # vagrant up
 
 set -o pipefail
+set -x
 
 # capture the directory that the script is running from
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
@@ -177,7 +178,7 @@ VERSION="latest"
 # Other formula versions we depend on
 POSTGRES_TAG="v0.41.6"
 NGINX_TAG="temp-fix-missing-statements-in-pillar"
-DOCKER_TAG="v1.0.0"
+DOCKER_TAG="v2.0.7"
 LOCALE_TAG="v0.3.4"
 LETSENCRYPT_TAG="v2.1.0"
 
@@ -232,8 +233,23 @@ fi
 if [ "${DUMP_CONFIG}" = "yes" ]; then
   echo "The provision installer will just dump a config under ${DUMP_SALT_CONFIG_DIR} and exit"
 else
-  apt-get update
-  apt-get install -y curl git jq
+  # Install a few dependency packages
+  # First, let's figure out the OS we're working on
+  OS_ID=$(grep ^ID= /etc/os-release |cut -f 2 -d=  |cut -f 2 -d \")
+  echo "Detected distro: ${OS_ID}"
+
+  case ${OS_ID} in
+    "centos")
+      echo "WARNING! Disabling SELinux, see https://dev.arvados.org/issues/18019"
+      sed -i 's/SELINUX=enforcing/SELINUX=permissive' /etc/sysconfig/selinux
+      setenforce permissive
+      yum install -y  curl git jq
+      ;;
+    "debian"|"ubuntu")
+      DEBIAN_FRONTEND=noninteractive apt update
+      DEBIAN_FRONTEND=noninteractive apt install -y curl git jq
+      ;;
+  esac
 
   if which salt-call; then
     echo "Salt already installed"
@@ -246,6 +262,8 @@ else
 
   # Set salt to masterless mode
   cat > /etc/salt/minion << EOFSM
+failhard: "True"
+
 file_client: local
 file_roots:
   base:
@@ -607,5 +625,10 @@ fi
 # Test that the installation finished correctly
 if [ "x${TEST}" = "xyes" ]; then
   cd ${T_DIR}
-  ./run-test.sh
+  # If we use RVM, we need to run this with it, or most ruby commands will fail
+  RVM_EXEC=""
+  if [ -x /usr/local/rvm/bin/rvm-exec ]; then
+    RVM_EXEC="/usr/local/rvm/bin/rvm-exec"
+  fi
+  ${RVM_EXEC} ./run-test.sh
 fi
index 53c51a2c5a097d2e8b45446ea26a7e2a26800f2d..020efa94e8f61303e06da5d087ecd712f9f1991f 100755 (executable)
@@ -55,13 +55,17 @@ echo "Activating user '__INITIAL_USER__'"
 arv user update --uuid "${user_uuid}" --user '{"is_active": true}'
 
 echo "Getting the user API TOKEN"
-user_api_token=$(arv api_client_authorization list --filters "[[\"owner_uuid\", \"=\", \"${user_uuid}\"],[\"kind\", \"==\", \"arvados#apiClientAuthorization\"]]" --limit=1 |jq -r .items[].api_token)
+user_api_token=$(arv api_client_authorization list | jq -r ".items[] | select( .owner_uuid == \"${user_uuid}\" ).api_token" | head -1)
 
 if [ "x${user_api_token}" = "x" ]; then
+  echo "No existing token found for user '__INITIAL_USER__' (user_uuid: '${user_uuid}'). Creating token"
   user_api_token=$(arv api_client_authorization create --api-client-authorization "{\"owner_uuid\": \"${user_uuid}\"}" | jq -r .api_token)
 fi
 
+echo "API TOKEN FOR user '__INITIAL_USER__': '${user_api_token}'."
+
 # Change to the user's token and run the workflow
+echo "Switching to user '__INITIAL_USER__'"
 export ARVADOS_API_TOKEN="${user_api_token}"
 
 echo "Running test CWL workflow"
diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py
new file mode 100644 (file)
index 0000000..9a02745
--- /dev/null
@@ -0,0 +1,443 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: CC-BY-SA-3.0
+
+import argparse
+import logging
+import random
+import string
+import sys
+
+import arvados
+import arvados.collection
+
+logger = logging.getLogger('arvados.test_collection_create')
+logger.setLevel(logging.INFO)
+
+opts = argparse.ArgumentParser(add_help=False)
+opts.add_argument('--min-files', type=int, default=30000, help="""
+Minimum number of files on each directory. Default: 30000.
+""")
+opts.add_argument('--max-files', type=int, default=30000, help="""
+Maximum number of files on each directory. Default: 30000.
+""")
+opts.add_argument('--min-depth', type=int, default=0, help="""
+Minimum depth for the created tree structure. Default: 0.
+""")
+opts.add_argument('--max-depth', type=int, default=0, help="""
+Maximum depth for the created tree structure. Default: 0.
+""")
+opts.add_argument('--min-subdirs', type=int, default=1, help="""
+Minimum number of subdirectories created at every depth level. Default: 1.
+""")
+opts.add_argument('--max-subdirs', type=int, default=10, help="""
+Maximum number of subdirectories created at every depth level. Default: 10.
+""")
+opts.add_argument('--debug', action='store_true', default=False, help="""
+Sets logging level to DEBUG.
+""")
+
+arg_parser = argparse.ArgumentParser(
+    description='Create a collection with garbage data for testing purposes.',
+    parents=[opts])
+
+adjectives = ['abandoned','able','absolute','adorable','adventurous','academic',
+    'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic',
+    'active','actual','adept','admirable','admired','adolescent','adorable','adored',
+    'advanced','afraid','affectionate','aged','aggravating','aggressive','agile',
+    'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated',
+    'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored',
+    'ancient','angelic','angry','anguished','animated','annual','another','antique',
+    'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic',
+    'ashamed','assured','astonishing','athletic','attached','attentive','attractive',
+    'austere','authentic','authorized','automatic','avaricious','average','aware','awesome',
+    'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful',
+    'belated','beloved','beneficial','better','best','bewitched','big','big-hearted',
+    'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank',
+    'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling',
+    'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable',
+    'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly',
+    'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing',
+    'calculating','calm','candid','canine','capital','carefree','careful','careless',
+    'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery',
+    'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever',
+    'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful',
+    'colorless','colossal','comfortable','common','compassionate','competent','complete',
+    'complex','complicated','composed','concerned','concrete','confused','conscious',
+    'considerate','constant','content','conventional','cooked','cool','cooperative',
+    'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy',
+    'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded',
+    'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy',
+    'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark',
+    'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive',
+    'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed',
+    'delectable','delicious','delightful','delirious','demanding','dense','dental',
+    'dependable','dependent','descriptive','deserted','detailed','determined','devoted',
+    'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct',
+    'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant',
+    'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct',
+    'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic',
+    'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early',
+    'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated',
+    'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished',
+    'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened',
+    'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed',
+    'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted',
+    'excellent','exemplary','exhausted','excitable','excited','exciting','exotic',
+    'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small',
+    'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous',
+    'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal',
+    'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female',
+    'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand',
+    'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering',
+    'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy',
+    'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant',
+    'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened',
+    'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen',
+    'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan',
+    'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted',
+    'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering',
+    'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous',
+    'graceful','gracious','grand','grandiose','granular','grateful','grave','gray',
+    'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross',
+    'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome',
+    'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy',
+    'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious',
+    'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly',
+    'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious',
+    'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible',
+    'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry',
+    'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic',
+    'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate',
+    'illustrious','imaginary','imaginative','immaculate','immaterial','immediate',
+    'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish',
+    'impolite','important','impossible','impractical','impressionable','impressive',
+    'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential',
+    'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated',
+    'inferior','infinite','informal','innocent','insecure','insidious','insignificant',
+    'insistent','instructive','insubstantial','intelligent','intent','intentional',
+    'interesting','internal','international','intrepid','ironclad','irresponsible',
+    'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery',
+    'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo',
+    'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly',
+    'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame',
+    'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean',
+    'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited',
+    'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome',
+    'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely',
+    'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious',
+    'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous',
+    'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical',
+    'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy',
+    'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly',
+    'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental',
+    'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored',
+    'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural',
+    'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible',
+    'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy',
+    'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious',
+    'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd',
+    'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal',
+    'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary',
+    'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding',
+    'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry',
+    'parallel','parched','partial','passionate','past','pastel','peaceful','peppery',
+    'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic',
+    'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic',
+    'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political',
+    'pointed','pointless','poised','poor','popular','portly','posh','positive','possible',
+    'potable','powerful','powerless','practical','precious','present','prestigious',
+    'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private',
+    'prize','probable','productive','profitable','profuse','proper','proud','prudent',
+    'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling',
+    'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable',
+    'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical',
+    'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular',
+    'ready','real','realistic','reasonable','red','reflecting','regal','regular',
+    'reliable','relieved','remarkable','remorseful','remote','repentant','required',
+    'respectful','responsible','repulsive','revolving','rewarding','rich','rigid',
+    'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough',
+    'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty',
+    'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied',
+    'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful',
+    'scratchy','scrawny','second','secondary','second-hand','secret','self-assured',
+    'self-reliant','selfish','sentimental','separate','serene','serious','serpentine',
+    'several','severe','shabby','shadowy','shady','shallow','shameful','shameless',
+    'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term',
+    'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple',
+    'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight',
+    'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug',
+    'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid',
+    'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy',
+    'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy',
+    'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square',
+    'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark',
+    'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange',
+    'steel','strict','strident','striking','striped','strong','studious','stunning',
+    'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle',
+    'suburban','sudden','sugary','sunny','super','superb','superficial','superior',
+    'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering',
+    'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty',
+    'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible',
+    'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this',
+    'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy',
+    'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured',
+    'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim',
+    'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby',
+    'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable',
+    'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit',
+    'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique',
+    'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant',
+    'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy',
+    'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling',
+    'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful',
+    'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid',
+    'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious',
+    'victorious','vigilant','vigorous','villainous','violet','violent','virtual',
+    'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm',
+    'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy',
+    'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome',
+    'well-documented','well-groomed','well-informed','well-lit','well-made','well-off',
+    'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white',
+    'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted',
+    'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful',
+    'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst',
+    'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry',
+    'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous',
+    'zesty','zigzag']
+nouns = ['people','history','way','art','world','information','map','two','family',
+    'government','health','system','computer','meat','year','thanks','music','person',
+    'reading','method','data','food','understanding','theory','law','bird','literature',
+    'problem','software','control','knowledge','power','ability','economics','love',
+    'internet','television','science','library','nature','fact','product','idea',
+    'temperature','investment','area','society','activity','story','industry','media',
+    'thing','oven','community','definition','safety','quality','development','language',
+    'management','player','variety','video','week','security','country','exam','movie',
+    'organization','equipment','physics','analysis','policy','series','thought','basis',
+    'boyfriend','direction','strategy','technology','army','camera','freedom','paper',
+    'environment','child','instance','month','truth','marketing','university','writing',
+    'article','department','difference','goal','news','audience','fishing','growth',
+    'income','marriage','user','combination','failure','meaning','medicine','philosophy',
+    'teacher','communication','night','chemistry','disease','disk','energy','nation',
+    'road','role','soup','advertising','location','success','addition','apartment','education',
+    'math','moment','painting','politics','attention','decision','event','property',
+    'shopping','student','wood','competition','distribution','entertainment','office',
+    'population','president','unit','category','cigarette','context','introduction',
+    'opportunity','performance','driver','flight','length','magazine','newspaper',
+    'relationship','teaching','cell','dealer','finding','lake','member','message','phone',
+    'scene','appearance','association','concept','customer','death','discussion','housing',
+    'inflation','insurance','mood','woman','advice','blood','effort','expression','importance',
+    'opinion','payment','reality','responsibility','situation','skill','statement','wealth',
+    'application','city','county','depth','estate','foundation','grandmother','heart',
+    'perspective','photo','recipe','studio','topic','collection','depression','imagination',
+    'passion','percentage','resource','setting','ad','agency','college','connection',
+    'criticism','debt','description','memory','patience','secretary','solution','administration',
+    'aspect','attitude','director','personality','psychology','recommendation','response',
+    'selection','storage','version','alcohol','argument','complaint','contract','emphasis',
+    'highway','loss','membership','possession','preparation','steak','union','agreement',
+    'cancer','currency','employment','engineering','entry','interaction','mixture','preference',
+    'region','republic','tradition','virus','actor','classroom','delivery','device',
+    'difficulty','drama','election','engine','football','guidance','hotel','owner',
+    'priority','protection','suggestion','tension','variation','anxiety','atmosphere',
+    'awareness','bath','bread','candidate','climate','comparison','confusion','construction',
+    'elevator','emotion','employee','employer','guest','height','leadership','mall','manager',
+    'operation','recording','sample','transportation','charity','cousin','disaster','editor',
+    'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome',
+    'permission','presentation','promotion','reflection','refrigerator','resolution','revenue',
+    'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee',
+    'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud',
+    'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant',
+    'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife',
+    'accident','airport','appointment','arrival','assumption','baseball','chapter','committee',
+    'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall',
+    'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie',
+    'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea',
+    'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate',
+    'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination',
+    'garbage','grocery','honey','impression','improvement','independence','insect','inspection',
+    'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity',
+    'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair',
+    'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday',
+    'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear',
+    'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady',
+    'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution',
+    'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat',
+    'trainer','uncle','youth','time','work','film','water','money','example','while','business','study',
+    'game','life','form','air','day','place','number','part','field','fish','back','process','heat',
+    'hand','experience','job','book','end','point','type','home','economy','value','body','market',
+    'guide','interest','state','radio','course','company','price','size','card','list','mind','trade',
+    'line','care','group','risk','word','fat','force','key','light','training','name','school','top',
+    'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun',
+    'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture',
+    'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause',
+    'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class',
+    'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance',
+    'figure','man','model','source','beginning','earth','program','chicken','design','feature','head',
+    'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun',
+    'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard',
+    'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame',
+    'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view',
+    'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift',
+    'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot',
+    'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square',
+    'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot',
+    'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot',
+    'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence',
+    'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar',
+    'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer',
+    'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign',
+    'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park',
+    'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit',
+    'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart',
+    'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season',
+    'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path',
+    'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast',
+    'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance',
+    'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance',
+    'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew',
+    'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex',
+    'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat',
+    'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole',
+    'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile',
+    'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle',
+    'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency',
+    'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband',
+    'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof',
+    'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation',
+    'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother',
+    'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf',
+    'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent',
+    'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank',
+    'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar',
+    'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage',
+    'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise',
+    'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle',
+    'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer',
+    'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow',
+    'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug',
+    'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer',
+    'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip',
+    'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring',
+    'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you',
+    'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great',
+    'being','few','might','still','public','read','keep','start','give','human','local','general','she',
+    'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big',
+    'possible','particular','today','major','personal','current','national','cut','natural','physical',
+    'show','try','check','second','call','move','pay','let','increase','single','individual','turn',
+    'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook',
+    'alternative','following','special','working','whole','dance','excuse','cold','commercial','low',
+    'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present',
+    'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run',
+    'complex','due','effective','middle','regular','reserve','independent','leave','original','reach',
+    'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit',
+    'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique',
+    'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar',
+    'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy',
+    'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish',
+    'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow',
+    'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat',
+    'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse',
+    'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark',
+    'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack',
+    'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird',
+    'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing',
+    'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt',
+    'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape',
+    'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt',
+    'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain',
+    'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny',
+    'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect',
+    'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand',
+    'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal',
+    'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry',
+    'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip',
+    'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap',
+    'yesterday']
+
+def get_random_name(with_ext=True):
+    return "{}_{}_{}{}".format(
+        random.choice(adjectives),
+        random.choice(nouns),
+        random.randint(0, 50000),
+        with_ext and '.txt' or '')
+
+def get_random_file(max_filesize):
+    file_start = random.randint(0, (max_filesize - 1025))
+    file_size = random.randint(0, (max_filesize - file_start))
+    file_name = get_random_name()
+    return "{}:{}:{}".format(file_start, file_size, file_name)
+
+def get_stream(name, max_filesize, data_loc, args):
+    files = []
+    for _ in range(random.randint(args.min_files, args.max_files)):
+        files.append(get_random_file(max_filesize))
+    stream = "{} {} {}".format(name, data_loc, ' '.join(files))
+    return stream
+
+def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
+    current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
+    current_size += len(current_stream)
+    streams = [current_stream]
+
+    if current_size >= (128 * 1024 * 1024):
+        logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
+    elif depth == 0:
+        logger.debug("Finished stream {}".format(base_stream_name))
+    else:
+        for _ in range(random.randint(args.min_subdirs, args.max_subdirs)):
+            stream_name = base_stream_name+'/'+get_random_name(False)
+            substreams = create_substreams(depth-1, stream_name, max_filesize,
+                data_loc, args, current_size)
+            current_size += sum([len(x) for x in substreams])
+            if current_size >= (128 * 1024 * 1024):
+                break
+            streams.extend(substreams)
+    return streams
+
+def parse_arguments(arguments):
+    args = arg_parser.parse_args(arguments)
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+    if args.max_files < args.min_files:
+        arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files))
+    if args.min_depth < 0:
+        arg_parser.error("--min-depth should be at least 0")
+    if args.max_depth < 0 or args.max_depth < args.min_depth:
+        arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth))
+    if args.max_subdirs < args.min_subdirs:
+        arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs))
+    return args
+
+def main(arguments=None):
+    args = parse_arguments(arguments)
+    logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs))
+    api = arvados.api('v1', timeout=5*60)
+    max_filesize = 1024*1024
+    data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])
+    data_loc = arvados.KeepClient(api).put(data_block)
+    streams = create_substreams(random.randint(args.min_depth, args.max_depth),
+        '.', max_filesize, data_loc, args)
+    manifest = ''
+    for s in streams:
+        if len(manifest)+len(s) > (1024*1024*128)-2:
+            logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0]))
+            break
+        manifest += s + '\n'
+    try:
+        coll_name = get_random_name(False)
+        coll = api.collections().create(
+            body={"collection": {
+                "name": coll_name,
+                "manifest_text": manifest
+            },
+        }).execute()
+    except:
+        logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest)))
+        raise
+    logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest)))
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file