From: Stephen Smith Date: Thu, 26 Aug 2021 16:52:20 +0000 (-0400) Subject: Merge branch '15159-export-trustallcontent' into main. Closes #15159 X-Git-Tag: 2.3.0~98 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/e1df29a3d682b28400ca35e490aa4a95aa564246?hp=f41684bd6aef3b1a9683edc64d5d95669a392853 Merge branch '15159-export-trustallcontent' into main. Closes #15159 Arvados-DCO-1.1-Signed-off-by: Stephen Smith --- diff --git a/apps/workbench/test/controllers/work_units_controller_test.rb b/apps/workbench/test/controllers/work_units_controller_test.rb index 6f74955cd1..0191c7f0df 100644 --- a/apps/workbench/test/controllers/work_units_controller_test.rb +++ b/apps/workbench/test/controllers/work_units_controller_test.rb @@ -13,26 +13,26 @@ class WorkUnitsControllerTest < ActionController::TestCase [ ['foo', 10, 25, ['/pipeline_instances/zzzzz-d1hrv-1xfj6xkicf2muk2', - '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7'], ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2']], ['pipeline_with_tagged_collection_input', 1, 1, ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3'], - ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-pshmckwoma9plh7', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2']], ['no_such_match', 0, 0, [], - ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-pshmckwoma9plh7', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2']], ].each do |search_filter, expected_min, expected_max, expected, not_expected| test "all_processes page for search filter '#{search_filter}'" do - work_units_index(filters: [['any','@@', search_filter]], show_children: true) + work_units_index(filters: [['any','ilike', "%#{search_filter}%"]], show_children: true) assert_response :success # Verify that expected number of processes are found diff --git a/apps/workbench/test/integration/work_units_test.rb b/apps/workbench/test/integration/work_units_test.rb index 4f2ebbc554..36b29468ff 100644 --- a/apps/workbench/test/integration/work_units_test.rb +++ b/apps/workbench/test/integration/work_units_test.rb @@ -14,7 +14,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest [[true, 25, 100, ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3', - '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2', @@ -23,7 +23,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest '/container_requests/zzzzz-xvhdp-oneof60crs00001']], [false, 25, 100, ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3', - '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/container_requests/zzzzz-xvhdp-cr4completedcr2'], ['/pipeline_instances/zzzzz-d1hrv-scarxiyajtshq3l', '/container_requests/zzzzz-xvhdp-oneof60crs00001', diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid index 8435e2871f..9e7410260f 100644 --- a/doc/admin/upgrading.html.textile.liquid +++ b/doc/admin/upgrading.html.textile.liquid @@ -39,6 +39,10 @@ h2(#main). development main (as of 2021-07-15) "Upgrading from 2.2.0":#v2_2_0 +h3. Removed deprecated '@@' search operator + +The '@@' full text search operator, previously deprecated, has been removed. To perform a string search across multiple columns, use the 'ilike' operator on 'any' column as described in the "available list method filter section":{{site.baseurl}}/api/methods.html#substringsearchfilter of the API documentation. + h3. Storage classes must be defined explicitly If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details. diff --git a/doc/api/methods.html.textile.liquid b/doc/api/methods.html.textile.liquid index c6e4ba00a7..670a9e0da3 100644 --- a/doc/api/methods.html.textile.liquid +++ b/doc/api/methods.html.textile.liquid @@ -96,7 +96,7 @@ table(table table-bordered table-condensed). |1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@| |2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@| -The following operators are available.[1] +The following operators are available. table(table table-bordered table-condensed). |_. Operator|_. Operand type|_. Description|_. Example| @@ -167,5 +167,3 @@ table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location | {background:#ccffcc}.|uuid|string|The UUID of the resource in question.|path|| |{resource_type}|object||query|| - -fn1^. NOTE: The filter operator for full-text search (@@) which previously worked (but was undocumented) is deprecated and will be removed in a future release. diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go index 26f0dbb0d1..6851442054 100644 --- a/lib/controller/integration_test.go +++ b/lib/controller/integration_test.go @@ -20,6 +20,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" "git.arvados.org/arvados.git/lib/boot" "git.arvados.org/arvados.git/lib/config" @@ -187,6 +188,49 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) { c.Check(coll.PortableDataHash, check.Equals, pdh) } +// Tests bug #18004 +func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) { + conn1 := s.testClusters["z1111"].Conn() + rootctx1, _, _ := s.testClusters["z1111"].RootClients() + rootctx2, _, _ := s.testClusters["z2222"].RootClients() + conn2 := s.testClusters["z2222"].Conn() + userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2@example.com", true) + + var wg1, wg2 sync.WaitGroup + creqs := 100 + + // Make concurrent requests to z2222 with a local token to make sure more + // than one worker is listening. + wg1.Add(1) + for i := 0; i < creqs; i++ { + wg2.Add(1) + go func() { + defer wg2.Done() + wg1.Wait() + _, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{}) + c.Check(err, check.IsNil, check.Commentf("warm up phase failed")) + }() + } + wg1.Done() + wg2.Wait() + + // Real test pass -- use a new remote token than the one used in the warm-up + // phase. + wg1.Add(1) + for i := 0; i < creqs; i++ { + wg2.Add(1) + go func() { + defer wg2.Done() + wg1.Wait() + // Retrieve the remote collection from cluster z2222. + _, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{}) + c.Check(err, check.IsNil, check.Commentf("testing phase failed")) + }() + } + wg1.Done() + wg2.Wait() +} + func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) { if _, err := exec.LookPath("s3cmd"); err != nil { c.Skip("s3cmd not in PATH") @@ -502,7 +546,7 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) { } // We test the direct access to the database -// normally an integration test would not have a database access, but in this case we need +// normally an integration test would not have a database access, but in this case we need // to test tokens that are secret, so there is no API response that will give them back func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) { ctx := context.Background() diff --git a/lib/crunchrun/singularity.go b/lib/crunchrun/singularity.go index 741f542454..61fecad0a1 100644 --- a/lib/crunchrun/singularity.go +++ b/lib/crunchrun/singularity.go @@ -101,7 +101,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar if len(cl.Items) == 1 { imageCollection = cl.Items[0] } else { - collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339) + collectionName := "converting " + collectionName exp := time.Now().Add(24 * 7 * 2 * time.Hour) err = containerClient.RequestAndDecode(&imageCollection, arvados.EndpointCollectionCreate.Method, @@ -112,6 +112,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar "name": collectionName, "trash_at": exp.UTC().Format(time.RFC3339), }, + "ensure_unique_name": true, }) if err != nil { return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err) @@ -141,6 +142,12 @@ func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath s } if _, err := os.Stat(imageFilename); os.IsNotExist(err) { + // Make sure the docker image is readable, and error + // out if not. + if _, err := os.Stat(imageTarballPath); err != nil { + return err + } + e.logf("building singularity image") // "singularity build" does not accept a // docker-archive://... filename containing a ":" character, diff --git a/services/api/app/models/api_client_authorization.rb b/services/api/app/models/api_client_authorization.rb index 52f2cee064..7c7ed759c6 100644 --- a/services/api/app/models/api_client_authorization.rb +++ b/services/api/app/models/api_client_authorization.rb @@ -319,7 +319,17 @@ class ApiClientAuthorization < ArvadosModel user.last_name = "from cluster #{remote_user_prefix}" end - user.save! + begin + user.save! + rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique + Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...") + # Some other request won the race: retry fetching the user record. + user = User.find_by_uuid(remote_user['uuid']) + if !user + Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}") + return nil + end + end if user.is_invited && !remote_user['is_invited'] # Remote user is not "invited" state, they should be unsetup, which @@ -364,12 +374,24 @@ class ApiClientAuthorization < ArvadosModel exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh, remote_token.andand['expires_at']].compact.min scopes = remote_token.andand['scopes'] || ['all'] - auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth| - auth.user = user - auth.api_token = stored_secret - auth.api_client_id = 0 - auth.scopes = scopes - auth.expires_at = exp + begin + retries ||= 0 + auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth| + auth.user = user + auth.api_token = stored_secret + auth.api_client_id = 0 + auth.scopes = scopes + auth.expires_at = exp + end + rescue ActiveRecord::RecordNotUnique + Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...") + # Some other request won the race: retry just once before erroring out + if (retries += 1) <= 1 + retry + else + Rails.logger.warn("cannot find or create cached remote token #{token_uuid}") + return nil + end end auth.update_attributes!(user: user, api_token: stored_secret, diff --git a/services/api/db/migrate/20210816191509_drop_fts_index.rb b/services/api/db/migrate/20210816191509_drop_fts_index.rb new file mode 100644 index 0000000000..4ee1f55a37 --- /dev/null +++ b/services/api/db/migrate/20210816191509_drop_fts_index.rb @@ -0,0 +1,31 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class DropFtsIndex < ActiveRecord::Migration[5.2] + def fts_indexes + { + "collections" => "collections_full_text_search_idx", + "container_requests" => "container_requests_full_text_search_idx", + "groups" => "groups_full_text_search_idx", + "jobs" => "jobs_full_text_search_idx", + "pipeline_instances" => "pipeline_instances_full_text_search_idx", + "pipeline_templates" => "pipeline_templates_full_text_search_idx", + "workflows" => "workflows_full_text_search_idx", + } + end + + def up + fts_indexes.keys.each do |t| + i = fts_indexes[t] + execute "DROP INDEX IF EXISTS #{i}" + end + end + + def down + fts_indexes.keys.each do |t| + i = fts_indexes[t] + execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector})" + end + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index 2bca887212..2f77483356 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -238,29 +238,6 @@ SET default_tablespace = ''; SET default_with_oids = false; --- --- Name: groups; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.groups ( - id integer NOT NULL, - uuid character varying(255), - owner_uuid character varying(255), - created_at timestamp without time zone NOT NULL, - modified_by_client_uuid character varying(255), - modified_by_user_uuid character varying(255), - modified_at timestamp without time zone, - name character varying(255) NOT NULL, - description character varying(524288), - updated_at timestamp without time zone NOT NULL, - group_class character varying(255), - trash_at timestamp without time zone, - is_trashed boolean DEFAULT false NOT NULL, - delete_at timestamp without time zone, - properties jsonb DEFAULT '{}'::jsonb -); - - -- -- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: - -- @@ -571,6 +548,29 @@ CREATE SEQUENCE public.containers_id_seq ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id; +-- +-- Name: groups; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.groups ( + id integer NOT NULL, + uuid character varying(255), + owner_uuid character varying(255), + created_at timestamp without time zone NOT NULL, + modified_by_client_uuid character varying(255), + modified_by_user_uuid character varying(255), + modified_at timestamp without time zone, + name character varying(255) NOT NULL, + description character varying(524288), + updated_at timestamp without time zone NOT NULL, + group_class character varying(255), + trash_at timestamp without time zone, + is_trashed boolean DEFAULT false NOT NULL, + delete_at timestamp without time zone, + properties jsonb DEFAULT '{}'::jsonb +); + + -- -- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: - -- @@ -1722,13 +1722,6 @@ CREATE INDEX authorized_keys_search_index ON public.authorized_keys USING btree CREATE INDEX collection_index_on_properties ON public.collections USING gin (properties); --- --- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000))); - - -- -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -1743,13 +1736,6 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops); --- --- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000))); - - -- -- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: - -- @@ -1785,13 +1771,6 @@ CREATE INDEX containers_search_index ON public.containers USING btree (uuid, own CREATE INDEX group_index_on_properties ON public.groups USING gin (properties); --- --- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000))); - - -- -- Name: groups_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2779,13 +2758,6 @@ CREATE UNIQUE INDEX index_workflows_on_uuid ON public.workflows USING btree (uui CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, job_uuid, created_by_job_task_uuid); --- --- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000))); - - -- -- Name: jobs_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2877,13 +2849,6 @@ CREATE INDEX permission_target ON public.materialized_permissions USING btree (t CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid); --- --- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000))); - - -- -- Name: pipeline_instances_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2905,13 +2870,6 @@ CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instance CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_templates USING btree (owner_uuid, name); --- --- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000))); - - -- -- Name: pipeline_templates_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2968,13 +2926,6 @@ CREATE INDEX users_search_index ON public.users USING btree (uuid, owner_uuid, m CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, hostname); --- --- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000))); - - -- -- Name: workflows_search_idx; Type: INDEX; Schema: public; Owner: - -- @@ -3194,6 +3145,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20201202174753'), ('20210108033940'), ('20210126183521'), -('20210621204455'); +('20210621204455'), +('20210816191509'); diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb index 5688ca6140..f8898d63c9 100644 --- a/services/api/lib/record_filters.rb +++ b/services/api/lib/record_filters.rb @@ -31,7 +31,10 @@ module RecordFilters model_table_name = model_class.table_name filters.each do |filter| attrs_in, operator, operand = filter - if attrs_in == 'any' && operator != '@@' + if operator == '@@' + raise ArgumentError.new("Full text search operator is no longer supported") + end + if attrs_in == 'any' attrs = model_class.searchable_columns(operator) elsif attrs_in.is_a? Array attrs = attrs_in @@ -54,22 +57,6 @@ module RecordFilters attrs = [] end - if operator == '@@' - # Full-text search - if attrs_in != 'any' - raise ArgumentError.new("Full text search on individual columns is not supported") - end - if operand.is_a? Array - raise ArgumentError.new("Full text search not supported for array operands") - end - - # Skip the generic per-column operator loop below - attrs = [] - # Use to_tsquery since plainto_tsquery does not support prefix - # search. And, split operand and join the words with ' & ' - cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)" - param_out << operand.split.join(' & ') - end attrs.each do |attr| subproperty = attr.split(".", 2) diff --git a/services/api/test/fixtures/jobs.yml b/services/api/test/fixtures/jobs.yml index 9b067aa263..ab76417902 100644 --- a/services/api/test/fixtures/jobs.yml +++ b/services/api/test/fixtures/jobs.yml @@ -521,7 +521,7 @@ running_job_in_publicly_accessible_project: uuid: zzzzz-8i9sb-n7omg50bvt0m1nf owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0 modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz - repository: active/foo + repository: active/bar script: running_job_script script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577 state: Running diff --git a/services/api/test/fixtures/pipeline_instances.yml b/services/api/test/fixtures/pipeline_instances.yml index 0865503281..9621b3effc 100644 --- a/services/api/test/fixtures/pipeline_instances.yml +++ b/services/api/test/fixtures/pipeline_instances.yml @@ -111,12 +111,9 @@ has_job: components_is_jobspec: # Helps test that clients cope with funny-shaped components. # For an example, see #3321. - uuid: zzzzz-d1hrv-jobspeccomponts - created_at: <%= 30.minute.ago.to_s(:db) %> + uuid: zzzzz-d1hrv-1yfj61234abcdk4 + created_at: <%= 2.minute.ago.to_s(:db) %> owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz - created_at: 2014-04-14 12:35:04 -0400 - updated_at: 2014-04-14 12:35:04 -0400 - modified_at: 2014-04-14 12:35:04 -0400 modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz state: RunningOnServer diff --git a/services/api/test/functional/arvados/v1/filters_test.rb b/services/api/test/functional/arvados/v1/filters_test.rb index 26270b1c3c..bcb1807867 100644 --- a/services/api/test/functional/arvados/v1/filters_test.rb +++ b/services/api/test/functional/arvados/v1/filters_test.rb @@ -29,34 +29,14 @@ class Arvados::V1::FiltersTest < ActionController::TestCase json_response['errors'].join(' ')) end - test 'error message for full text search on a specific column' do + test 'error message for unsupported full text search' do @controller = Arvados::V1::CollectionsController.new authorize_with :active get :index, params: { filters: [['uuid', '@@', 'abcdef']], } assert_response 422 - assert_match(/not supported/, json_response['errors'].join(' ')) - end - - test 'difficult characters in full text search' do - @controller = Arvados::V1::CollectionsController.new - authorize_with :active - get :index, params: { - filters: [['any', '@@', 'a|b"c']], - } - assert_response :success - # (Doesn't matter so much which results are returned.) - end - - test 'array operand in full text search' do - @controller = Arvados::V1::CollectionsController.new - authorize_with :active - get :index, params: { - filters: [['any', '@@', ['abc', 'def']]], - } - assert_response 422 - assert_match(/not supported/, json_response['errors'].join(' ')) + assert_match(/no longer supported/, json_response['errors'].join(' ')) end test 'api responses provide timestamps with nanoseconds' do @@ -100,58 +80,6 @@ class Arvados::V1::FiltersTest < ActionController::TestCase end end - test "full text search with count='none'" do - @controller = Arvados::V1::GroupsController.new - authorize_with :admin - - get :contents, params: { - format: :json, - count: 'none', - limit: 1000, - filters: [['any', '@@', Rails.configuration.ClusterID]], - } - - assert_response :success - - all_objects = Hash.new(0) - json_response['items'].map{|o| o['kind']}.each{|t| all_objects[t] += 1} - - assert_equal true, all_objects['arvados#group']>0 - assert_equal true, all_objects['arvados#job']>0 - assert_equal true, all_objects['arvados#pipelineInstance']>0 - assert_equal true, all_objects['arvados#pipelineTemplate']>0 - - # Perform test again mimicking a second page request with: - # last_object_class = PipelineInstance - # and hence groups and jobs should not be included in the response - # offset = 5, which means first 5 pipeline instances were already received in page 1 - # and hence the remaining pipeline instances and all other object types should be included in the response - - @test_counter = 0 # Reset executed action counter - - @controller = Arvados::V1::GroupsController.new - - get :contents, params: { - format: :json, - count: 'none', - limit: 1000, - offset: '5', - last_object_class: 'PipelineInstance', - filters: [['any', '@@', Rails.configuration.ClusterID]], - } - - assert_response :success - - second_page = Hash.new(0) - json_response['items'].map{|o| o['kind']}.each{|t| second_page[t] += 1} - - assert_equal false, second_page.include?('arvados#group') - assert_equal false, second_page.include?('arvados#job') - assert_equal true, second_page['arvados#pipelineInstance']>0 - assert_equal all_objects['arvados#pipelineInstance'], second_page['arvados#pipelineInstance']+5 - assert_equal true, second_page['arvados#pipelineTemplate']>0 - end - [['prop1', '=', 'value1', [:collection_with_prop1_value1], [:collection_with_prop1_value2, :collection_with_prop2_1]], ['prop1', '!=', 'value1', [:collection_with_prop1_value2, :collection_with_prop2_1], [:collection_with_prop1_value1]], ['prop1', 'exists', true, [:collection_with_prop1_value1, :collection_with_prop1_value2, :collection_with_prop1_value3, :collection_with_prop1_other1], [:collection_with_prop2_1]], diff --git a/services/api/test/integration/collections_api_test.rb b/services/api/test/integration/collections_api_test.rb index 73cbad6430..070e964e53 100644 --- a/services/api/test/integration/collections_api_test.rb +++ b/services/api/test/integration/collections_api_test.rb @@ -373,75 +373,6 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest end end - test "search collection using full text search" do - # create collection to be searched for - signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active)) - post "/arvados/v1/collections", - params: { - format: :json, - collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json, - }, - headers: auth(:active) - assert_response :success - assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt') - - # search using the filename - search_using_full_text_search 'subdir2', 0 - search_using_full_text_search 'subdir2:*', 1 - search_using_full_text_search 'subdir2/subdir3/subdir4', 1 - search_using_full_text_search 'file4:*', 1 - search_using_full_text_search 'file4_in_subdir4.txt', 1 - search_using_full_text_search 'subdir2 file4:*', 0 # first word is incomplete - search_using_full_text_search 'subdir2/subdir3/subdir4 file4:*', 1 - search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1 - search_using_full_text_search 'ile4', 0 # not a prefix match - end - - def search_using_full_text_search search_filter, expected_items - get '/arvados/v1/collections', - params: {:filters => [['any', '@@', search_filter]].to_json}, - headers: auth(:active) - assert_response :success - response_items = json_response['items'] - assert_not_nil response_items - if expected_items == 0 - assert_empty response_items - else - refute_empty response_items - first_item = response_items.first - assert_not_nil first_item - end - end - - # search for the filename in the file_names column and expect error - test "full text search not supported for individual columns" do - get '/arvados/v1/collections', - params: {:filters => [['name', '@@', 'General']].to_json}, - headers: auth(:active) - assert_response 422 - end - - [ - 'quick fox', - 'quick_brown fox', - 'brown_ fox', - 'fox dogs', - ].each do |search_filter| - test "full text search ignores special characters and finds with filter #{search_filter}" do - # description: The quick_brown_fox jumps over the lazy_dog - # full text search treats '_' as space apparently - get '/arvados/v1/collections', - params: {:filters => [['any', '@@', search_filter]].to_json}, - headers: auth(:active) - assert_response 200 - response_items = json_response['items'] - assert_not_nil response_items - first_item = response_items.first - refute_empty first_item - assert_equal first_item['description'], 'The quick_brown_fox jumps over the lazy_dog' - end - end - test "create and get collection with properties" do # create collection to be searched for signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) diff --git a/services/api/test/integration/groups_test.rb b/services/api/test/integration/groups_test.rb index aa67166f7e..e76f2b5406 100644 --- a/services/api/test/integration/groups_test.rb +++ b/services/api/test/integration/groups_test.rb @@ -64,46 +64,6 @@ class GroupsTest < ActionDispatch::IntegrationTest end end - [ - ['Collection_', true], # collections and pipelines templates - ['hash', true], # pipeline templates - ['fa7aeb5140e2848d39b', false], # script_parameter of pipeline instances - ['fa7aeb5140e2848d39b:*', true], # script_parameter of pipeline instances - ['project pipeline', true], # finds "Completed pipeline in A Project" - ['project pipeli:*', true], # finds "Completed pipeline in A Project" - ['proje pipeli:*', false], # first word is incomplete, so no prefix match - ['no-such-thing', false], # script_parameter of pipeline instances - ].each do |search_filter, expect_results| - test "full text search of group-owned objects for #{search_filter}" do - get "/arvados/v1/groups/contents", - params: { - id: groups(:aproject).uuid, - limit: 5, - :filters => [['any', '@@', search_filter]].to_json - }, - headers: auth(:active) - assert_response :success - if expect_results - refute_empty json_response['items'] - json_response['items'].each do |item| - assert item['uuid'] - assert_equal groups(:aproject).uuid, item['owner_uuid'] - end - else - assert_empty json_response['items'] - end - end - end - - test "full text search is not supported for individual columns" do - get "/arvados/v1/groups/contents", - params: { - :filters => [['name', '@@', 'Private']].to_json - }, - headers: auth(:active) - assert_response 422 - end - test "group contents with include trash collections" do get "/arvados/v1/groups/contents", params: { diff --git a/services/api/test/unit/arvados_model_test.rb b/services/api/test/unit/arvados_model_test.rb index 64f7807135..1e2e08059e 100644 --- a/services/api/test/unit/arvados_model_test.rb +++ b/services/api/test/unit/arvados_model_test.rb @@ -155,51 +155,6 @@ class ArvadosModelTest < ActiveSupport::TestCase end end - test "full text search index exists on models" do - indexes = {} - conn = ActiveRecord::Base.connection - conn.exec_query("SELECT i.relname as indname, - i.relowner as indowner, - idx.indrelid::regclass::text as table, - am.amname as indam, - idx.indkey, - ARRAY( - SELECT pg_get_indexdef(idx.indexrelid, k + 1, true) - FROM generate_subscripts(idx.indkey, 1) as k - ORDER BY k - ) as keys, - idx.indexprs IS NOT NULL as indexprs, - idx.indpred IS NOT NULL as indpred - FROM pg_index as idx - JOIN pg_class as i - ON i.oid = idx.indexrelid - JOIN pg_am as am - ON i.relam = am.oid - JOIN pg_namespace as ns - ON ns.oid = i.relnamespace - AND ns.nspname = ANY(current_schemas(false))").each do |idx| - if idx['keys'].match(/to_tsvector/) - indexes[idx['table']] ||= [] - indexes[idx['table']] << idx - end - end - fts_tables = ["collections", "container_requests", "groups", "jobs", - "pipeline_instances", "pipeline_templates", "workflows"] - fts_tables.each do |table| - table_class = table.classify.constantize - if table_class.respond_to?('full_text_searchable_columns') - expect = table_class.full_text_searchable_columns - ok = false - indexes[table].andand.each do |idx| - if expect == idx['keys'].scan(/COALESCE\(([A-Za-z_]+)/).flatten - ok = true - end - end - assert ok, "#{table} has no full-text index\nexpect: #{expect.inspect}\nfound: #{indexes[table].inspect}" - end - end - end - [ %w[collections collections_trgm_text_search_idx], %w[container_requests container_requests_trgm_text_search_idx], diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 78cbd0d8cf..2b963d9a68 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -298,20 +298,52 @@ class CollectionDirectoryBase(Directory): def on_event(self, event, collection, name, item): if collection == self.collection: name = self.sanitize_filename(name) - _logger.debug("collection notify %s %s %s %s", event, collection, name, item) - with llfuse.lock: - if event == arvados.collection.ADD: - self.new_entry(name, item, self.mtime()) - elif event == arvados.collection.DEL: - ent = self._entries[name] - del self._entries[name] - self.inodes.invalidate_entry(self, name) - self.inodes.del_entry(ent) - elif event == arvados.collection.MOD: - if hasattr(item, "fuse_entry") and item.fuse_entry is not None: - self.inodes.invalidate_inode(item.fuse_entry) - elif name in self._entries: - self.inodes.invalidate_inode(self._entries[name]) + + # + # It's possible for another thread to have llfuse.lock and + # be waiting on collection.lock. Meanwhile, we released + # llfuse.lock earlier in the stack, but are still holding + # on to the collection lock, and now we need to re-acquire + # llfuse.lock. If we don't release the collection lock, + # we'll deadlock where we're holding the collection lock + # waiting for llfuse.lock and the other thread is holding + # llfuse.lock and waiting for the collection lock. + # + # The correct locking order here is to take llfuse.lock + # first, then the collection lock. + # + # Since collection.lock is an RLock, it might be locked + # multiple times, so we need to release it multiple times, + # keep a count, then re-lock it the correct number of + # times. + # + lockcount = 0 + try: + while True: + self.collection.lock.release() + lockcount += 1 + except RuntimeError: + pass + + try: + with llfuse.lock: + with self.collection.lock: + if event == arvados.collection.ADD: + self.new_entry(name, item, self.mtime()) + elif event == arvados.collection.DEL: + ent = self._entries[name] + del self._entries[name] + self.inodes.invalidate_entry(self, name) + self.inodes.del_entry(ent) + elif event == arvados.collection.MOD: + if hasattr(item, "fuse_entry") and item.fuse_entry is not None: + self.inodes.invalidate_inode(item.fuse_entry) + elif name in self._entries: + self.inodes.invalidate_inode(self._entries[name]) + finally: + while lockcount > 0: + self.collection.lock.acquire() + lockcount -= 1 def populate(self, mtime): self._mtime = mtime @@ -587,10 +619,26 @@ class TmpCollectionDirectory(CollectionDirectoryBase): def on_event(self, *args, **kwargs): super(TmpCollectionDirectory, self).on_event(*args, **kwargs) if self.collection_record_file: - with llfuse.lock: - self.collection_record_file.invalidate() - self.inodes.invalidate_inode(self.collection_record_file) - _logger.debug("%s invalidated collection record", self) + + # See discussion in CollectionDirectoryBase.on_event + lockcount = 0 + try: + while True: + self.collection.lock.release() + lockcount += 1 + except RuntimeError: + pass + + try: + with llfuse.lock: + with self.collection.lock: + self.collection_record_file.invalidate() + self.inodes.invalidate_inode(self.collection_record_file) + _logger.debug("%s invalidated collection record", self) + finally: + while lockcount > 0: + self.collection.lock.acquire() + lockcount -= 1 def collection_record(self): with llfuse.lock_released: diff --git a/services/keepstore/handler_test.go b/services/keepstore/handler_test.go index 897447dd11..16dcd2aaf6 100644 --- a/services/keepstore/handler_test.go +++ b/services/keepstore/handler_test.go @@ -23,6 +23,7 @@ import ( "os" "sort" "strings" + "sync/atomic" "time" "git.arvados.org/arvados.git/lib/config" @@ -367,6 +368,94 @@ func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) { } } +func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) { + s.cluster.Volumes = map[string]arvados.Volume{ + "zzzzz-nyw5e-111111111111111": { + Driver: "mock", + Replication: 1, + ReadOnly: true, + StorageClasses: map[string]bool{"class1": true}}, + } + c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil) + resp := IssueRequest(s.handler, + &RequestTester{ + method: "PUT", + uri: "/" + TestHash, + requestBody: TestBlock, + storageClasses: "class1", + }) + c.Check(resp.Code, check.Equals, FullError.HTTPCode) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0) +} + +func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) { + s.cluster.Volumes = map[string]arvados.Volume{ + "zzzzz-nyw5e-111111111111111": { + Driver: "mock", + Replication: 1, + StorageClasses: map[string]bool{"class1": true}}, + "zzzzz-nyw5e-121212121212121": { + Driver: "mock", + Replication: 1, + StorageClasses: map[string]bool{"class1": true, "class2": true}}, + "zzzzz-nyw5e-222222222222222": { + Driver: "mock", + Replication: 1, + StorageClasses: map[string]bool{"class2": true}}, + } + + for _, trial := range []struct { + setCounter uint32 // value to stuff vm.counter, to control offset + classes string // desired classes + put111 int // expected number of "put" ops on 11111... after 2x put reqs + put121 int // expected number of "put" ops on 12121... + put222 int // expected number of "put" ops on 22222... + cmp111 int // expected number of "compare" ops on 11111... after 2x put reqs + cmp121 int // expected number of "compare" ops on 12121... + cmp222 int // expected number of "compare" ops on 22222... + }{ + {0, "class1", + 1, 0, 0, + 2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121 + {0, "class2", + 0, 1, 0, + 0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121 + {0, "class1,class2", + 1, 1, 0, + 2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121 + {1, "class1,class2", + 0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121 + 2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121 + {0, "class1,class2,class404", + 1, 1, 0, + 2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121 + } { + c.Logf("%+v", trial) + s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{ + "class1": {}, + "class2": {}, + "class3": {}, + } + c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil) + atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter) + for i := 0; i < 2; i++ { + IssueRequest(s.handler, + &RequestTester{ + method: "PUT", + uri: "/" + TestHash, + requestBody: TestBlock, + storageClasses: trial.classes, + }) + } + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222) + } +} + // Test TOUCH requests. func (s *HandlerSuite) TestTouchHandler(c *check.C) { c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil) diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go index 2b469a13eb..910033ebb1 100644 --- a/services/keepstore/handlers.go +++ b/services/keepstore/handlers.go @@ -18,6 +18,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "git.arvados.org/arvados.git/sdk/go/arvados" @@ -741,6 +742,7 @@ func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []b } type putProgress struct { + classNeeded map[string]bool classTodo map[string]bool mountUsed map[*VolumeMount]bool totalReplication int @@ -769,7 +771,7 @@ func (pr putProgress) ClassReplication() string { func (pr *putProgress) Add(mnt *VolumeMount) { if pr.mountUsed[mnt] { - logrus.Warnf("BUG? superfluous extra write to mount %s", mnt) + logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID) return } pr.mountUsed[mnt] = true @@ -780,6 +782,21 @@ func (pr *putProgress) Add(mnt *VolumeMount) { } } +func (pr *putProgress) Sub(mnt *VolumeMount) { + if !pr.mountUsed[mnt] { + logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID) + return + } + pr.mountUsed[mnt] = false + pr.totalReplication -= mnt.Replication + for class := range mnt.StorageClasses { + pr.classDone[class] -= mnt.Replication + if pr.classNeeded[class] { + pr.classTodo[class] = true + } + } +} + func (pr *putProgress) Done() bool { return len(pr.classTodo) == 0 && pr.totalReplication > 0 } @@ -800,47 +817,65 @@ func (pr *putProgress) Want(mnt *VolumeMount) bool { return false } -func newPutResult(classes []string) putProgress { +func (pr *putProgress) Copy() *putProgress { + cp := putProgress{ + classNeeded: pr.classNeeded, + classTodo: make(map[string]bool, len(pr.classTodo)), + classDone: make(map[string]int, len(pr.classDone)), + mountUsed: make(map[*VolumeMount]bool, len(pr.mountUsed)), + totalReplication: pr.totalReplication, + } + for k, v := range pr.classTodo { + cp.classTodo[k] = v + } + for k, v := range pr.classDone { + cp.classDone[k] = v + } + for k, v := range pr.mountUsed { + cp.mountUsed[k] = v + } + return &cp +} + +func newPutProgress(classes []string) putProgress { pr := putProgress{ - classTodo: make(map[string]bool, len(classes)), - classDone: map[string]int{}, - mountUsed: map[*VolumeMount]bool{}, + classNeeded: make(map[string]bool, len(classes)), + classTodo: make(map[string]bool, len(classes)), + classDone: map[string]int{}, + mountUsed: map[*VolumeMount]bool{}, } for _, c := range classes { if c != "" { + pr.classNeeded[c] = true pr.classTodo[c] = true } } return pr } -// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep. -// -// PutBlock(ctx, block, hash) -// Stores the BLOCK (identified by the content id HASH) in Keep. -// -// The MD5 checksum of the block must be identical to the content id HASH. -// If not, an error is returned. +// PutBlock stores the given block on one or more volumes. // -// PutBlock stores the BLOCK on the first Keep volume with free space. -// A failure code is returned to the user only if all volumes fail. +// The MD5 checksum of the block must match the given hash. // -// On success, PutBlock returns nil. -// On failure, it returns a KeepError with one of the following codes: +// The block is written to each writable volume (ordered by priority +// and then UUID, see volume.go) until at least one replica has been +// stored in each of the requested storage classes. // -// 500 Collision -// A different block with the same hash already exists on this -// Keep server. -// 422 MD5Fail -// The MD5 hash of the BLOCK does not match the argument HASH. -// 503 Full -// There was not enough space left in any Keep volume to store -// the object. -// 500 Fail -// The object could not be stored for some other reason (e.g. -// all writes failed). The text of the error message should -// provide as much detail as possible. +// The returned error, if any, is a KeepError with one of the +// following codes: // +// 500 Collision +// A different block with the same hash already exists on this +// Keep server. +// 422 MD5Fail +// The MD5 hash of the BLOCK does not match the argument HASH. +// 503 Full +// There was not enough space left in any Keep volume to store +// the object. +// 500 Fail +// The object could not be stored for some other reason (e.g. +// all writes failed). The text of the error message should +// provide as much detail as possible. func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) { log := ctxlog.FromContext(ctx) @@ -851,72 +886,88 @@ func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash s return putProgress{}, RequestHashError } - result := newPutResult(wantStorageClasses) + result := newPutProgress(wantStorageClasses) // If we already have this data, it's intact on disk, and we // can update its timestamp, return success. If we have // different data with the same hash, return failure. - if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil { + if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() { return result, err } if ctx.Err() != nil { return result, ErrClientDisconnect } - // Choose a Keep volume to write to. - // If this volume fails, try all of the volumes in order. - if mnt := volmgr.NextWritable(); mnt == nil || !result.Want(mnt) { - // fall through to "try all volumes" below - } else if err := mnt.Put(ctx, hash, block); err != nil { - log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash) - } else { - result.Add(mnt) - if result.Done() { - return result, nil - } - } - if ctx.Err() != nil { - return putProgress{}, ErrClientDisconnect - } - - writables := volmgr.AllWritable() + writables := volmgr.NextWritable() if len(writables) == 0 { log.Error("no writable volumes") - return putProgress{}, FullError + return result, FullError } - allFull := true + var wg sync.WaitGroup + var mtx sync.Mutex + cond := sync.Cond{L: &mtx} + // pending predicts what result will be if all pending writes + // succeed. + pending := result.Copy() + var allFull atomic.Value + allFull.Store(true) + + // We hold the lock for the duration of the "each volume" loop + // below, except when it is released during cond.Wait(). + mtx.Lock() + for _, mnt := range writables { + // Wait until our decision to use this mount does not + // depend on the outcome of pending writes. + for result.Want(mnt) && !pending.Want(mnt) { + cond.Wait() + } if !result.Want(mnt) { continue } - err := mnt.Put(ctx, hash, block) - if ctx.Err() != nil { - return result, ErrClientDisconnect - } - switch err { - case nil: - result.Add(mnt) - if result.Done() { - return result, nil + mnt := mnt + pending.Add(mnt) + wg.Add(1) + go func() { + log.Debugf("PutBlock: start write to %s", mnt.UUID) + defer wg.Done() + err := mnt.Put(ctx, hash, block) + + mtx.Lock() + if err != nil { + log.Debugf("PutBlock: write to %s failed", mnt.UUID) + pending.Sub(mnt) + } else { + log.Debugf("PutBlock: write to %s succeeded", mnt.UUID) + result.Add(mnt) } - continue - case FullError: - continue - default: - // The volume is not full but the - // write did not succeed. Report the - // error and continue trying. - allFull = false - log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash) - } + cond.Broadcast() + mtx.Unlock() + + if err != nil && err != FullError && ctx.Err() == nil { + // The volume is not full but the + // write did not succeed. Report the + // error and continue trying. + allFull.Store(false) + log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash) + } + }() + } + mtx.Unlock() + wg.Wait() + if ctx.Err() != nil { + return result, ErrClientDisconnect + } + if result.Done() { + return result, nil } if result.totalReplication > 0 { // Some, but not all, of the storage classes were // satisfied. This qualifies as success. return result, nil - } else if allFull { + } else if allFull.Load().(bool) { log.Error("all volumes with qualifying storage classes are full") return putProgress{}, FullError } else { diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go index 9bfc6ca3e5..3f7c9cb79b 100644 --- a/services/keepstore/volume.go +++ b/services/keepstore/volume.go @@ -344,11 +344,11 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my vm.writables = append(vm.writables, mnt) } } - // pri(i): return highest priority of any storage class - // offered by vm.readables[i] - pri := func(i int) int { + // pri(mnt): return highest priority of any storage class + // offered by mnt + pri := func(mnt *VolumeMount) int { any, best := false, 0 - for class := range vm.readables[i].KeepMount.StorageClasses { + for class := range mnt.KeepMount.StorageClasses { if p := cluster.StorageClasses[class].Priority; !any || best < p { best = p any = true @@ -356,14 +356,20 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my } return best } - // sort vm.readables, first by highest priority of any offered + // less(a,b): sort first by highest priority of any offered // storage class (highest->lowest), then by volume UUID - sort.Slice(vm.readables, func(i, j int) bool { - if pi, pj := pri(i), pri(j); pi != pj { - return pi > pj + less := func(a, b *VolumeMount) bool { + if pa, pb := pri(a), pri(b); pa != pb { + return pa > pb } else { - return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID + return a.KeepMount.UUID < b.KeepMount.UUID } + } + sort.Slice(vm.readables, func(i, j int) bool { + return less(vm.readables[i], vm.readables[j]) + }) + sort.Slice(vm.writables, func(i, j int) bool { + return less(vm.writables[i], vm.writables[j]) }) return vm, nil } @@ -384,18 +390,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount { return vm.readables } -// AllWritable returns an array of all writable volumes +// AllWritable returns writable volumes, sorted by priority/uuid. Used +// by CompareAndTouch to ensure higher-priority volumes are checked +// first. func (vm *RRVolumeManager) AllWritable() []*VolumeMount { return vm.writables } -// NextWritable returns the next writable -func (vm *RRVolumeManager) NextWritable() *VolumeMount { +// NextWritable returns writable volumes, rotated by vm.counter so +// each volume gets a turn to be first. Used by PutBlock to distribute +// new data across available volumes. +func (vm *RRVolumeManager) NextWritable() []*VolumeMount { if len(vm.writables) == 0 { return nil } - i := atomic.AddUint32(&vm.counter, 1) - return vm.writables[i%uint32(len(vm.writables))] + offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables) + return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...) } // VolumeStats returns an ioStats for the given volume. diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base index 79f0d3f4f6..c112972c43 100644 --- a/tools/arvbox/lib/arvbox/docker/Dockerfile.base +++ b/tools/arvbox/lib/arvbox/docker/Dockerfile.base @@ -73,7 +73,7 @@ ENV DEBIAN_FRONTEND noninteractive # gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less RUN apt-get update && \ apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \ - gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less && \ + gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less vim-tiny && \ apt-get clean ENV GOPATH /var/lib/gopath diff --git a/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service b/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service index fb3eaaeee8..a112cb93fe 100755 --- a/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service +++ b/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service @@ -59,5 +59,6 @@ fi export VERSION=$(./version-at-commit.sh) export BROWSER=none export CI=true +export HTTPS=false node --version exec node node_modules/react-scripts/scripts/start.js diff --git a/tools/salt-install/Vagrantfile b/tools/salt-install/Vagrantfile index 3019a9fb1c..a3463bfc5c 100644 --- a/tools/salt-install/Vagrantfile +++ b/tools/salt-install/Vagrantfile @@ -35,7 +35,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| cp -vr /vagrant/tests /home/vagrant/tests; sed 's#cluster_fixme_or_this_wont_work#harpo#g; s#domain_fixme_or_this_wont_work#local#g; - s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g; + s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g; s#CONTROLLER_EXT_SSL_PORT=443#CONTROLLER_EXT_SSL_PORT=8443#g' \ /vagrant/local.params.example.single_host_multiple_hostnames > /tmp/local.params.single_host_multiple_hostnames" arv.vm.provision "shell", @@ -78,7 +78,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| cp -vr /vagrant/tests /home/vagrant/tests; sed 's#HOSTNAME_EXT=\"\"#HOSTNAME_EXT=\"zeppo.local\"#g; s#cluster_fixme_or_this_wont_work#zeppo#g; - s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g; + s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g; s#domain_fixme_or_this_wont_work#local#g;' \ /vagrant/local.params.example.single_host_single_hostname > /tmp/local.params.single_host_single_hostname" arv.vm.provision "shell", diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls index 23e0076504..ccf6bac789 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +# vim: ft=yaml --- # Copyright (C) The Arvados Authors. All rights reserved. # @@ -26,6 +28,7 @@ arvados: ## manage OS packages with some other tool and you don't want us messing up ## with your setup. ruby: + ## We set these to `true` here for testing purposes. ## They both default to `false`. manage_ruby: true @@ -67,8 +70,15 @@ arvados: host: 127.0.0.1 password: "__DATABASE_PASSWORD__" user: __CLUSTER___arvados - encoding: en_US.utf8 - client_encoding: UTF8 + extra_conn_params: + client_encoding: UTF8 + # Centos7 does not enable SSL by default, so we disable + # it here just for testing of the formula purposes only. + # You should not do this in production, and should + # configure Postgres certificates correctly + {%- if grains.os_family in ('RedHat',) %} + sslmode: disable + {%- endif %} tls: # certificate: '' @@ -76,6 +86,13 @@ arvados: # required to test with arvados-snakeoil certs insecure: true + resources: + virtual_machines: + shell: + name: webshell + backend: 127.0.1.1 + port: 4200 + ### TOKENS tokens: system_root: __SYSTEM_ROOT_TOKEN__ diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls index b2f12c7739..54087f6d6d 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls @@ -3,17 +3,23 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- if grains.os_family in ('RedHat',) %} + {%- set group = 'nginx' %} +{%- else %} + {%- set group = 'www-data' %} +{%- endif %} + ### ARVADOS arvados: config: - group: www-data + group: {{ group }} ### NGINX nginx: ### SITES servers: managed: - arvados_api: + arvados_api.conf: enabled: true overwrite: true config: diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls index 3adf0580a4..195e9af82e 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls @@ -20,7 +20,7 @@ nginx: servers: managed: ### DEFAULT - arvados_controller_default: + arvados_controller_default.conf: enabled: true overwrite: true config: @@ -33,9 +33,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_controller_ssl: + arvados_controller_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: __CLUSTER__.__DOMAIN__ @@ -52,7 +54,8 @@ nginx: - proxy_set_header: 'X-Real-IP $remote_addr' - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for' - proxy_set_header: 'X-External-Client $external_client' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.error.log - client_max_body_size: 128m diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls index 2d8922df9a..91179d4a86 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls @@ -16,7 +16,7 @@ nginx: servers: managed: ### DEFAULT - arvados_keepproxy_default: + arvados_keepproxy_default.conf: enabled: true overwrite: true config: @@ -29,9 +29,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_keepproxy_ssl: + arvados_keepproxy_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: keep.__CLUSTER__.__DOMAIN__ @@ -52,6 +54,7 @@ nginx: - client_max_body_size: 64M - proxy_http_version: '1.1' - proxy_request_buffering: 'off' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls index d180a3bad4..9ea16bfb54 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls @@ -16,7 +16,7 @@ nginx: servers: managed: ### DEFAULT - arvados_collections_download_default: + arvados_collections_download_default.conf: enabled: true overwrite: true config: @@ -30,9 +30,11 @@ nginx: - return: '301 https://$host$request_uri' ### COLLECTIONS / DOWNLOAD - arvados_collections_download_ssl: + arvados_collections_download_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: collections.__CLUSTER__.__DOMAIN__ download.__CLUSTER__.__DOMAIN__ @@ -52,6 +54,7 @@ nginx: - client_max_body_size: 0 - proxy_http_version: '1.1' - proxy_request_buffering: 'off' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls index 6ce75faa70..a4d3c34f26 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls @@ -3,19 +3,69 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- set passenger_pkg = 'nginx-mod-http-passenger' + if grains.osfinger in ('CentOS Linux-7') else + 'libnginx-mod-http-passenger' %} +{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so' + if grains.osfinger in ('CentOS Linux-7',) else + '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %} +{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby' + if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else + '/usr/bin/ruby' %} + ### NGINX nginx: install_from_phusionpassenger: true lookup: - passenger_package: libnginx-mod-http-passenger - passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf + passenger_package: {{ passenger_pkg }} + ### PASSENGER + passenger: + passenger_ruby: {{ passenger_ruby }} ### SERVER server: config: - include: 'modules-enabled/*.conf' + # This is required to get the passenger module loaded + # In Debian it can be done with this + # include: 'modules-enabled/*.conf' + load_module: {{ passenger_mod }} + worker_processes: 4 + ### SNIPPETS + snippets: + # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 + ssl_hardening_default.conf: + - ssl_session_timeout: 1d + - ssl_session_cache: 'shared:arvadosSSL:10m' + - ssl_session_tickets: 'off' + + # intermediate configuration + - ssl_protocols: TLSv1.2 TLSv1.3 + - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + - ssl_prefer_server_ciphers: 'off' + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + - add_header: 'Strict-Transport-Security "max-age=63072000" always' + + # OCSP stapling + # FIXME! Stapling does not work with self-signed certificates, so disabling for tests + # - ssl_stapling: 'on' + # - ssl_stapling_verify: 'on' + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates + + # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam + # - ssl_dhparam: /path/to/dhparam + + # replace with the IP address of your resolver + # - resolver: 127.0.0.1 + + arvados-snakeoil.conf: + - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem + - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key + ### SITES servers: managed: diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls index e75f044343..9b73ab4a09 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls @@ -3,6 +3,20 @@ # # SPDX-License-Identifier: AGPL-3.0 +# This parameter will be used here to generate a list of upstreams and vhosts. +# This dict is here for convenience and should be managed some other way, but the +# different ways of orchestration that can be used for this are outside the scope +# of this formula and their examples. +# These upstreams should match those defined in `arvados:cluster:resources:virtual_machines` +{% set webshell_virtual_machines = { + 'shell': { + 'name': 'webshell', + 'backend': '127.0.1.1', + 'port': 4200, + } +} +%} + ### NGINX nginx: ### SERVER @@ -11,13 +25,20 @@ nginx: ### STREAMS http: - upstream webshell_upstream: - - server: 'shell.internal:4200 fail_timeout=10s' + {%- for vm, params in webshell_virtual_machines.items() %} + {%- set vm_name = params.name | default(vm) %} + {%- set vm_backend = params.backend | default(vm_name) %} + {%- set vm_port = params.port | default(4200) %} + + upstream {{ vm_name }}_upstream: + - server: '{{ vm_backend }}:{{ vm_port }} fail_timeout=10s' + + {%- endfor %} ### SITES servers: managed: - arvados_webshell_default: + arvados_webshell_default.conf: enabled: true overwrite: true config: @@ -30,17 +51,21 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_webshell_ssl: + arvados_webshell_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: webshell.__CLUSTER__.__DOMAIN__ - listen: - __CONTROLLER_EXT_SSL_PORT__ http2 ssl - index: index.html index.htm - - location /shell.__CLUSTER__.__DOMAIN__: - - proxy_pass: 'http://webshell_upstream' + {%- for vm, params in webshell_virtual_machines.items() %} + {%- set vm_name = params.name | default(vm) %} + - location /{{ vm_name }}: + - proxy_pass: 'http://{{ vm_name }}_upstream' - proxy_read_timeout: 90 - proxy_connect_timeout: 90 - proxy_set_header: 'Host $http_host' @@ -67,8 +92,9 @@ nginx: - add_header: "'Access-Control-Allow-Origin' '*'" - add_header: "'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'" - add_header: "'Access-Control-Allow-Headers' 'DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type'" - - - include: 'snippets/arvados-snakeoil.conf' + {%- endfor %} + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls index 3a354ac293..bcd0457c9e 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls @@ -16,7 +16,7 @@ nginx: servers: managed: ### DEFAULT - arvados_websocket_default: + arvados_websocket_default.conf: enabled: true overwrite: true config: @@ -29,9 +29,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_websocket_ssl: + arvados_websocket_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: ws.__CLUSTER__.__DOMAIN__ @@ -53,6 +55,7 @@ nginx: - client_max_body_size: 64M - proxy_http_version: '1.1' - proxy_request_buffering: 'off' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls index 8fdd553991..44bd16fe3e 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls @@ -1,12 +1,18 @@ --- # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: AGPL-3.0 +# SPDX-License-Identifier: Apache-2.0 + +{%- if grains.os_family in ('RedHat',) %} + {%- set group = 'nginx' %} +{%- else %} + {%- set group = 'www-data' %} +{%- endif %} ### ARVADOS arvados: config: - group: www-data + group: {{ group }} ### NGINX nginx: @@ -14,7 +20,7 @@ nginx: servers: managed: ### DEFAULT - arvados_workbench2_default: + arvados_workbench2_default.conf: enabled: true overwrite: true config: @@ -27,9 +33,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_workbench2_ssl: + arvados_workbench2_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: workbench2.__CLUSTER__.__DOMAIN__ @@ -43,6 +51,7 @@ nginx: - return: 503 - location /config.json: - return: {{ "200 '" ~ '{"API_HOST":"__CLUSTER__.__DOMAIN__:__CONTROLLER_EXT_SSL_PORT__"}' ~ "'" }} - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls index 649af10b6d..6b7ab969f9 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls @@ -3,10 +3,16 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- if grains.os_family in ('RedHat',) %} + {%- set group = 'nginx' %} +{%- else %} + {%- set group = 'www-data' %} +{%- endif %} + ### ARVADOS arvados: config: - group: www-data + group: {{ group }} ### NGINX nginx: @@ -23,7 +29,7 @@ nginx: servers: managed: ### DEFAULT - arvados_workbench_default: + arvados_workbench_default.conf: enabled: true overwrite: true config: @@ -36,9 +42,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_workbench_ssl: + arvados_workbench_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: workbench.__CLUSTER__.__DOMAIN__ @@ -54,11 +62,12 @@ nginx: - proxy_set_header: 'Host $http_host' - proxy_set_header: 'X-Real-IP $remote_addr' - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.error.log - arvados_workbench_upstream: + arvados_workbench_upstream.conf: enabled: true overwrite: true config: diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls index 71e712cad3..fda1545a05 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls @@ -5,11 +5,29 @@ ### POSTGRESQL postgres: - use_upstream_repo: false + # Centos-7's postgres package is too old, so we need to force using upstream's + # This is not required in Debian's family as they already ship with PG +11 + {%- if salt['grains.get']('os_family') == 'RedHat' %} + use_upstream_repo: true + version: '12' + + pkgs_deps: + - libicu + - libxslt + - systemd-sysv + + pkgs_extra: + - postgresql12-contrib + + {%- else %} pkgs_extra: - postgresql-contrib + {%- endif %} postgresconf: |- listen_addresses = '*' # listen on all interfaces + #ssl = on + #ssl_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' + #ssl_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' acls: - ['local', 'all', 'postgres', 'peer'] - ['local', 'all', 'all', 'peer'] diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls index fb1473def2..91617e4fa4 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls @@ -1,15 +1,22 @@ # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: AGPL-3.0 +# SPDX-License-Identifier: Apache-2.0 {%- set curr_tpldir = tpldir %} {%- set tpldir = 'arvados' %} {%- from "arvados/map.jinja" import arvados with context %} {%- set tpldir = curr_tpldir %} -{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %} +include: + - nginx.passenger + - nginx.config + - nginx.service + +# Debian uses different dirs for certs and keys, but being a Snake Oil example, +# we'll keep it simple here. +{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %} {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %} -{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %} +{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %} {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %} {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %} @@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in - ca-certificates arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run: - # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run + # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run cmd.run: - name: | # These dirs are not to CentOS-ish, but this is a helper script @@ -121,6 +128,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c - require: - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run + # We need this before we can add the nginx's snippet + - require_in: + - file: nginx_snippet_arvados-snakeoil.conf {%- if grains.get('os_family') == 'Debian' %} arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed: @@ -130,29 +140,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal - sls: postgres arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run: - cmd.run: - - name: | - chown root:ssl-cert {{ arvados_key_file }} + file.managed: + - name: {{ arvados_key_file }} + - owner: root + - group: ssl-cert - require: - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed -{%- endif %} - -arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed: - file.managed: - - name: /etc/nginx/snippets/arvados-snakeoil.conf - - contents: | - ssl_certificate {{ arvados_cert_file }}; - ssl_certificate_key {{ arvados_key_file }}; - - watch_in: - - service: nginx_service - - require: - - pkg: passenger_install - - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run - require_in: - - file: nginx_config - - service: nginx_service - - watch_in: - - service: nginx_service - - + - file: nginx_snippet_arvados-snakeoil.conf +{%- endif %} diff --git a/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls index 6ce75faa70..a4d3c34f26 100644 --- a/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls +++ b/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls @@ -3,19 +3,69 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- set passenger_pkg = 'nginx-mod-http-passenger' + if grains.osfinger in ('CentOS Linux-7') else + 'libnginx-mod-http-passenger' %} +{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so' + if grains.osfinger in ('CentOS Linux-7',) else + '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %} +{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby' + if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else + '/usr/bin/ruby' %} + ### NGINX nginx: install_from_phusionpassenger: true lookup: - passenger_package: libnginx-mod-http-passenger - passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf + passenger_package: {{ passenger_pkg }} + ### PASSENGER + passenger: + passenger_ruby: {{ passenger_ruby }} ### SERVER server: config: - include: 'modules-enabled/*.conf' + # This is required to get the passenger module loaded + # In Debian it can be done with this + # include: 'modules-enabled/*.conf' + load_module: {{ passenger_mod }} + worker_processes: 4 + ### SNIPPETS + snippets: + # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 + ssl_hardening_default.conf: + - ssl_session_timeout: 1d + - ssl_session_cache: 'shared:arvadosSSL:10m' + - ssl_session_tickets: 'off' + + # intermediate configuration + - ssl_protocols: TLSv1.2 TLSv1.3 + - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + - ssl_prefer_server_ciphers: 'off' + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + - add_header: 'Strict-Transport-Security "max-age=63072000" always' + + # OCSP stapling + # FIXME! Stapling does not work with self-signed certificates, so disabling for tests + # - ssl_stapling: 'on' + # - ssl_stapling_verify: 'on' + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates + + # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam + # - ssl_dhparam: /path/to/dhparam + + # replace with the IP address of your resolver + # - resolver: 127.0.0.1 + + arvados-snakeoil.conf: + - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem + - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key + ### SITES servers: managed: diff --git a/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls b/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls index 130fb5e937..b6929fb887 100644 --- a/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls +++ b/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls @@ -1,15 +1,22 @@ # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: AGPL-3.0 +# SPDX-License-Identifier: Apache-2.0 {%- set curr_tpldir = tpldir %} {%- set tpldir = 'arvados' %} {%- from "arvados/map.jinja" import arvados with context %} {%- set tpldir = curr_tpldir %} -{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %} +include: + - nginx.passenger + - nginx.config + - nginx.service + +# Debian uses different dirs for certs and keys, but being a Snake Oil example, +# we'll keep it simple here. +{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %} {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %} -{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %} +{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %} {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %} {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %} @@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in - ca-certificates arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run: - # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run + # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run cmd.run: - name: | # These dirs are not to CentOS-ish, but this is a helper script @@ -124,6 +131,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c - require: - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run + # We need this before we can add the nginx's snippet + - require_in: + - file: nginx_snippet_arvados-snakeoil.conf {%- if grains.get('os_family') == 'Debian' %} arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed: @@ -133,26 +143,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal - sls: postgres arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run: - cmd.run: - - name: | - chown root:ssl-cert {{ arvados_key_file }} + file.managed: + - name: {{ arvados_key_file }} + - owner: root + - group: ssl-cert - require: - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed -{%- endif %} - -arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed: - file.managed: - - name: /etc/nginx/snippets/arvados-snakeoil.conf - - contents: | - ssl_certificate {{ arvados_cert_file }}; - ssl_certificate_key {{ arvados_key_file }}; - - require: - - pkg: nginx_install - require_in: - - file: nginx_config - - service: nginx_service - - watch_in: - - service: nginx_service - - + - file: nginx_snippet_arvados-snakeoil.conf +{%- endif %} diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts index 17b7b88884..283c631ec5 100644 --- a/tools/salt-install/local.params.example.multiple_hosts +++ b/tools/salt-install/local.params.example.multiple_hosts @@ -100,6 +100,6 @@ RELEASE="production" # ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" -# DOCKER_TAG="v1.0.0" +# DOCKER_TAG="v2.0.7" # LOCALE_TAG="v0.3.4" # LETSENCRYPT_TAG="v2.1.0" diff --git a/tools/salt-install/local.params.example.single_host_multiple_hostnames b/tools/salt-install/local.params.example.single_host_multiple_hostnames index ae54e7437a..e23634e8c4 100644 --- a/tools/salt-install/local.params.example.single_host_multiple_hostnames +++ b/tools/salt-install/local.params.example.single_host_multiple_hostnames @@ -72,6 +72,6 @@ RELEASE="production" # ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" -# DOCKER_TAG="v1.0.0" +# DOCKER_TAG="v2.0.7" # LOCALE_TAG="v0.3.4" # LETSENCRYPT_TAG="v2.1.0" diff --git a/tools/salt-install/local.params.example.single_host_single_hostname b/tools/salt-install/local.params.example.single_host_single_hostname index a35bd45bff..ae9804863f 100644 --- a/tools/salt-install/local.params.example.single_host_single_hostname +++ b/tools/salt-install/local.params.example.single_host_single_hostname @@ -81,6 +81,6 @@ RELEASE="production" # ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" -# DOCKER_TAG="v1.0.0" +# DOCKER_TAG="v2.0.7" # LOCALE_TAG="v0.3.4" # LETSENCRYPT_TAG="v2.1.0" diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh index 7ac120e5fd..b840d86c6f 100755 --- a/tools/salt-install/provision.sh +++ b/tools/salt-install/provision.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash # Copyright (C) The Arvados Authors. All rights reserved. # @@ -11,6 +11,7 @@ # vagrant up set -o pipefail +set -x # capture the directory that the script is running from SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" @@ -177,7 +178,7 @@ VERSION="latest" # Other formula versions we depend on POSTGRES_TAG="v0.41.6" NGINX_TAG="temp-fix-missing-statements-in-pillar" -DOCKER_TAG="v1.0.0" +DOCKER_TAG="v2.0.7" LOCALE_TAG="v0.3.4" LETSENCRYPT_TAG="v2.1.0" @@ -232,8 +233,23 @@ fi if [ "${DUMP_CONFIG}" = "yes" ]; then echo "The provision installer will just dump a config under ${DUMP_SALT_CONFIG_DIR} and exit" else - apt-get update - apt-get install -y curl git jq + # Install a few dependency packages + # First, let's figure out the OS we're working on + OS_ID=$(grep ^ID= /etc/os-release |cut -f 2 -d= |cut -f 2 -d \") + echo "Detected distro: ${OS_ID}" + + case ${OS_ID} in + "centos") + echo "WARNING! Disabling SELinux, see https://dev.arvados.org/issues/18019" + sed -i 's/SELINUX=enforcing/SELINUX=permissive' /etc/sysconfig/selinux + setenforce permissive + yum install -y curl git jq + ;; + "debian"|"ubuntu") + DEBIAN_FRONTEND=noninteractive apt update + DEBIAN_FRONTEND=noninteractive apt install -y curl git jq + ;; + esac if which salt-call; then echo "Salt already installed" @@ -246,6 +262,8 @@ else # Set salt to masterless mode cat > /etc/salt/minion << EOFSM +failhard: "True" + file_client: local file_roots: base: @@ -607,5 +625,10 @@ fi # Test that the installation finished correctly if [ "x${TEST}" = "xyes" ]; then cd ${T_DIR} - ./run-test.sh + # If we use RVM, we need to run this with it, or most ruby commands will fail + RVM_EXEC="" + if [ -x /usr/local/rvm/bin/rvm-exec ]; then + RVM_EXEC="/usr/local/rvm/bin/rvm-exec" + fi + ${RVM_EXEC} ./run-test.sh fi diff --git a/tools/salt-install/tests/run-test.sh b/tools/salt-install/tests/run-test.sh index 53c51a2c5a..020efa94e8 100755 --- a/tools/salt-install/tests/run-test.sh +++ b/tools/salt-install/tests/run-test.sh @@ -55,13 +55,17 @@ echo "Activating user '__INITIAL_USER__'" arv user update --uuid "${user_uuid}" --user '{"is_active": true}' echo "Getting the user API TOKEN" -user_api_token=$(arv api_client_authorization list --filters "[[\"owner_uuid\", \"=\", \"${user_uuid}\"],[\"kind\", \"==\", \"arvados#apiClientAuthorization\"]]" --limit=1 |jq -r .items[].api_token) +user_api_token=$(arv api_client_authorization list | jq -r ".items[] | select( .owner_uuid == \"${user_uuid}\" ).api_token" | head -1) if [ "x${user_api_token}" = "x" ]; then + echo "No existing token found for user '__INITIAL_USER__' (user_uuid: '${user_uuid}'). Creating token" user_api_token=$(arv api_client_authorization create --api-client-authorization "{\"owner_uuid\": \"${user_uuid}\"}" | jq -r .api_token) fi +echo "API TOKEN FOR user '__INITIAL_USER__': '${user_api_token}'." + # Change to the user's token and run the workflow +echo "Switching to user '__INITIAL_USER__'" export ARVADOS_API_TOKEN="${user_api_token}" echo "Running test CWL workflow" diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py new file mode 100644 index 0000000000..9a02745694 --- /dev/null +++ b/tools/test-collection-create/test-collection-create.py @@ -0,0 +1,443 @@ +#!/usr/bin/env python3 +# +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: CC-BY-SA-3.0 + +import argparse +import logging +import random +import string +import sys + +import arvados +import arvados.collection + +logger = logging.getLogger('arvados.test_collection_create') +logger.setLevel(logging.INFO) + +opts = argparse.ArgumentParser(add_help=False) +opts.add_argument('--min-files', type=int, default=30000, help=""" +Minimum number of files on each directory. Default: 30000. +""") +opts.add_argument('--max-files', type=int, default=30000, help=""" +Maximum number of files on each directory. Default: 30000. +""") +opts.add_argument('--min-depth', type=int, default=0, help=""" +Minimum depth for the created tree structure. Default: 0. +""") +opts.add_argument('--max-depth', type=int, default=0, help=""" +Maximum depth for the created tree structure. Default: 0. +""") +opts.add_argument('--min-subdirs', type=int, default=1, help=""" +Minimum number of subdirectories created at every depth level. Default: 1. +""") +opts.add_argument('--max-subdirs', type=int, default=10, help=""" +Maximum number of subdirectories created at every depth level. Default: 10. +""") +opts.add_argument('--debug', action='store_true', default=False, help=""" +Sets logging level to DEBUG. +""") + +arg_parser = argparse.ArgumentParser( + description='Create a collection with garbage data for testing purposes.', + parents=[opts]) + +adjectives = ['abandoned','able','absolute','adorable','adventurous','academic', + 'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic', + 'active','actual','adept','admirable','admired','adolescent','adorable','adored', + 'advanced','afraid','affectionate','aged','aggravating','aggressive','agile', + 'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated', + 'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored', + 'ancient','angelic','angry','anguished','animated','annual','another','antique', + 'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic', + 'ashamed','assured','astonishing','athletic','attached','attentive','attractive', + 'austere','authentic','authorized','automatic','avaricious','average','aware','awesome', + 'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful', + 'belated','beloved','beneficial','better','best','bewitched','big','big-hearted', + 'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank', + 'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling', + 'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable', + 'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly', + 'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing', + 'calculating','calm','candid','canine','capital','carefree','careful','careless', + 'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery', + 'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever', + 'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful', + 'colorless','colossal','comfortable','common','compassionate','competent','complete', + 'complex','complicated','composed','concerned','concrete','confused','conscious', + 'considerate','constant','content','conventional','cooked','cool','cooperative', + 'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy', + 'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded', + 'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy', + 'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark', + 'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive', + 'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed', + 'delectable','delicious','delightful','delirious','demanding','dense','dental', + 'dependable','dependent','descriptive','deserted','detailed','determined','devoted', + 'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct', + 'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant', + 'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct', + 'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic', + 'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early', + 'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated', + 'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished', + 'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened', + 'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed', + 'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted', + 'excellent','exemplary','exhausted','excitable','excited','exciting','exotic', + 'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small', + 'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous', + 'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal', + 'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female', + 'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand', + 'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering', + 'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy', + 'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant', + 'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened', + 'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen', + 'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan', + 'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted', + 'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering', + 'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous', + 'graceful','gracious','grand','grandiose','granular','grateful','grave','gray', + 'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross', + 'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome', + 'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy', + 'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious', + 'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly', + 'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious', + 'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible', + 'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry', + 'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic', + 'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate', + 'illustrious','imaginary','imaginative','immaculate','immaterial','immediate', + 'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish', + 'impolite','important','impossible','impractical','impressionable','impressive', + 'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential', + 'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated', + 'inferior','infinite','informal','innocent','insecure','insidious','insignificant', + 'insistent','instructive','insubstantial','intelligent','intent','intentional', + 'interesting','internal','international','intrepid','ironclad','irresponsible', + 'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery', + 'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo', + 'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly', + 'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame', + 'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean', + 'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited', + 'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome', + 'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely', + 'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious', + 'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous', + 'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical', + 'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy', + 'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly', + 'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental', + 'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored', + 'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural', + 'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible', + 'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy', + 'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious', + 'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd', + 'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal', + 'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary', + 'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding', + 'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry', + 'parallel','parched','partial','passionate','past','pastel','peaceful','peppery', + 'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic', + 'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic', + 'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political', + 'pointed','pointless','poised','poor','popular','portly','posh','positive','possible', + 'potable','powerful','powerless','practical','precious','present','prestigious', + 'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private', + 'prize','probable','productive','profitable','profuse','proper','proud','prudent', + 'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling', + 'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable', + 'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical', + 'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular', + 'ready','real','realistic','reasonable','red','reflecting','regal','regular', + 'reliable','relieved','remarkable','remorseful','remote','repentant','required', + 'respectful','responsible','repulsive','revolving','rewarding','rich','rigid', + 'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough', + 'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty', + 'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied', + 'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful', + 'scratchy','scrawny','second','secondary','second-hand','secret','self-assured', + 'self-reliant','selfish','sentimental','separate','serene','serious','serpentine', + 'several','severe','shabby','shadowy','shady','shallow','shameful','shameless', + 'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term', + 'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple', + 'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight', + 'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug', + 'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid', + 'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy', + 'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy', + 'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square', + 'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark', + 'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange', + 'steel','strict','strident','striking','striped','strong','studious','stunning', + 'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle', + 'suburban','sudden','sugary','sunny','super','superb','superficial','superior', + 'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering', + 'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty', + 'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible', + 'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this', + 'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy', + 'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured', + 'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim', + 'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby', + 'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable', + 'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit', + 'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique', + 'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant', + 'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy', + 'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling', + 'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful', + 'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid', + 'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious', + 'victorious','vigilant','vigorous','villainous','violet','violent','virtual', + 'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm', + 'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy', + 'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome', + 'well-documented','well-groomed','well-informed','well-lit','well-made','well-off', + 'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white', + 'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted', + 'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful', + 'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst', + 'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry', + 'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous', + 'zesty','zigzag'] +nouns = ['people','history','way','art','world','information','map','two','family', + 'government','health','system','computer','meat','year','thanks','music','person', + 'reading','method','data','food','understanding','theory','law','bird','literature', + 'problem','software','control','knowledge','power','ability','economics','love', + 'internet','television','science','library','nature','fact','product','idea', + 'temperature','investment','area','society','activity','story','industry','media', + 'thing','oven','community','definition','safety','quality','development','language', + 'management','player','variety','video','week','security','country','exam','movie', + 'organization','equipment','physics','analysis','policy','series','thought','basis', + 'boyfriend','direction','strategy','technology','army','camera','freedom','paper', + 'environment','child','instance','month','truth','marketing','university','writing', + 'article','department','difference','goal','news','audience','fishing','growth', + 'income','marriage','user','combination','failure','meaning','medicine','philosophy', + 'teacher','communication','night','chemistry','disease','disk','energy','nation', + 'road','role','soup','advertising','location','success','addition','apartment','education', + 'math','moment','painting','politics','attention','decision','event','property', + 'shopping','student','wood','competition','distribution','entertainment','office', + 'population','president','unit','category','cigarette','context','introduction', + 'opportunity','performance','driver','flight','length','magazine','newspaper', + 'relationship','teaching','cell','dealer','finding','lake','member','message','phone', + 'scene','appearance','association','concept','customer','death','discussion','housing', + 'inflation','insurance','mood','woman','advice','blood','effort','expression','importance', + 'opinion','payment','reality','responsibility','situation','skill','statement','wealth', + 'application','city','county','depth','estate','foundation','grandmother','heart', + 'perspective','photo','recipe','studio','topic','collection','depression','imagination', + 'passion','percentage','resource','setting','ad','agency','college','connection', + 'criticism','debt','description','memory','patience','secretary','solution','administration', + 'aspect','attitude','director','personality','psychology','recommendation','response', + 'selection','storage','version','alcohol','argument','complaint','contract','emphasis', + 'highway','loss','membership','possession','preparation','steak','union','agreement', + 'cancer','currency','employment','engineering','entry','interaction','mixture','preference', + 'region','republic','tradition','virus','actor','classroom','delivery','device', + 'difficulty','drama','election','engine','football','guidance','hotel','owner', + 'priority','protection','suggestion','tension','variation','anxiety','atmosphere', + 'awareness','bath','bread','candidate','climate','comparison','confusion','construction', + 'elevator','emotion','employee','employer','guest','height','leadership','mall','manager', + 'operation','recording','sample','transportation','charity','cousin','disaster','editor', + 'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome', + 'permission','presentation','promotion','reflection','refrigerator','resolution','revenue', + 'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee', + 'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud', + 'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant', + 'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife', + 'accident','airport','appointment','arrival','assumption','baseball','chapter','committee', + 'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall', + 'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie', + 'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea', + 'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate', + 'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination', + 'garbage','grocery','honey','impression','improvement','independence','insect','inspection', + 'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity', + 'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair', + 'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday', + 'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear', + 'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady', + 'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution', + 'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat', + 'trainer','uncle','youth','time','work','film','water','money','example','while','business','study', + 'game','life','form','air','day','place','number','part','field','fish','back','process','heat', + 'hand','experience','job','book','end','point','type','home','economy','value','body','market', + 'guide','interest','state','radio','course','company','price','size','card','list','mind','trade', + 'line','care','group','risk','word','fat','force','key','light','training','name','school','top', + 'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun', + 'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture', + 'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause', + 'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class', + 'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance', + 'figure','man','model','source','beginning','earth','program','chicken','design','feature','head', + 'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun', + 'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard', + 'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame', + 'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view', + 'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift', + 'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot', + 'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square', + 'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot', + 'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot', + 'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence', + 'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar', + 'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer', + 'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign', + 'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park', + 'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit', + 'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart', + 'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season', + 'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path', + 'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast', + 'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance', + 'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance', + 'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew', + 'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex', + 'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat', + 'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole', + 'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile', + 'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle', + 'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency', + 'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband', + 'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof', + 'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation', + 'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother', + 'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf', + 'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent', + 'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank', + 'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar', + 'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage', + 'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise', + 'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle', + 'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer', + 'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow', + 'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug', + 'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer', + 'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip', + 'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring', + 'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you', + 'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great', + 'being','few','might','still','public','read','keep','start','give','human','local','general','she', + 'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big', + 'possible','particular','today','major','personal','current','national','cut','natural','physical', + 'show','try','check','second','call','move','pay','let','increase','single','individual','turn', + 'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook', + 'alternative','following','special','working','whole','dance','excuse','cold','commercial','low', + 'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present', + 'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run', + 'complex','due','effective','middle','regular','reserve','independent','leave','original','reach', + 'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit', + 'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique', + 'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar', + 'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy', + 'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish', + 'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow', + 'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat', + 'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse', + 'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark', + 'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack', + 'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird', + 'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing', + 'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt', + 'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape', + 'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt', + 'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain', + 'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny', + 'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect', + 'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand', + 'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal', + 'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry', + 'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip', + 'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap', + 'yesterday'] + +def get_random_name(with_ext=True): + return "{}_{}_{}{}".format( + random.choice(adjectives), + random.choice(nouns), + random.randint(0, 50000), + with_ext and '.txt' or '') + +def get_random_file(max_filesize): + file_start = random.randint(0, (max_filesize - 1025)) + file_size = random.randint(0, (max_filesize - file_start)) + file_name = get_random_name() + return "{}:{}:{}".format(file_start, file_size, file_name) + +def get_stream(name, max_filesize, data_loc, args): + files = [] + for _ in range(random.randint(args.min_files, args.max_files)): + files.append(get_random_file(max_filesize)) + stream = "{} {} {}".format(name, data_loc, ' '.join(files)) + return stream + +def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0): + current_stream = get_stream(base_stream_name, max_filesize, data_loc, args) + current_size += len(current_stream) + streams = [current_stream] + + if current_size >= (128 * 1024 * 1024): + logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name)) + elif depth == 0: + logger.debug("Finished stream {}".format(base_stream_name)) + else: + for _ in range(random.randint(args.min_subdirs, args.max_subdirs)): + stream_name = base_stream_name+'/'+get_random_name(False) + substreams = create_substreams(depth-1, stream_name, max_filesize, + data_loc, args, current_size) + current_size += sum([len(x) for x in substreams]) + if current_size >= (128 * 1024 * 1024): + break + streams.extend(substreams) + return streams + +def parse_arguments(arguments): + args = arg_parser.parse_args(arguments) + if args.debug: + logger.setLevel(logging.DEBUG) + if args.max_files < args.min_files: + arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files)) + if args.min_depth < 0: + arg_parser.error("--min-depth should be at least 0") + if args.max_depth < 0 or args.max_depth < args.min_depth: + arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth)) + if args.max_subdirs < args.min_subdirs: + arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs)) + return args + +def main(arguments=None): + args = parse_arguments(arguments) + logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs)) + api = arvados.api('v1', timeout=5*60) + max_filesize = 1024*1024 + data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)]) + data_loc = arvados.KeepClient(api).put(data_block) + streams = create_substreams(random.randint(args.min_depth, args.max_depth), + '.', max_filesize, data_loc, args) + manifest = '' + for s in streams: + if len(manifest)+len(s) > (1024*1024*128)-2: + logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0])) + break + manifest += s + '\n' + try: + coll_name = get_random_name(False) + coll = api.collections().create( + body={"collection": { + "name": coll_name, + "manifest_text": manifest + }, + }).execute() + except: + logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest))) + raise + logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest))) + return 0 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file