Merge branch '17948-test-collection-tool' into main. Closes #17948
authorLucas Di Pentima <lucas.dipentima@curii.com>
Tue, 24 Aug 2021 20:18:19 +0000 (17:18 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Tue, 24 Aug 2021 20:18:19 +0000 (17:18 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

67 files changed:
apps/workbench/test/controllers/work_units_controller_test.rb
apps/workbench/test/integration/work_units_test.rb
build/run-library.sh
doc/_config.yml
doc/admin/upgrading.html.textile.liquid
doc/api/methods.html.textile.liquid
doc/install/singularity.html.textile.liquid [new file with mode: 0644]
doc/user/cwl/cwl-extensions.html.textile.liquid
doc/user/cwl/cwl-run-options.html.textile.liquid
doc/user/topics/arv-docker.html.textile.liquid
doc/user/topics/storage-classes.html.textile.liquid
lib/config/config.default.yml
lib/config/export.go
lib/config/generated_config.go
lib/controller/integration_test.go
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/context.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/wf/submit_storage_class_wf.cwl [new file with mode: 0644]
sdk/go/arvados/config.go
services/api/app/mailers/user_notifier.rb
services/api/app/models/api_client_authorization.rb
services/api/config/arvados_config.rb
services/api/db/migrate/20210816191509_drop_fts_index.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/record_filters.rb
services/api/lib/tasks/manage_long_lived_tokens.rake
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/pipeline_instances.yml
services/api/test/functional/arvados/v1/filters_test.rb
services/api/test/integration/collections_api_test.rb
services/api/test/integration/groups_test.rb
services/api/test/unit/arvados_model_test.rb
services/api/test/unit/user_notifier_test.rb
services/fuse/arvados_fuse/fusedir.py
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/volume.go
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/service/workbench2/run-service
tools/salt-install/Vagrantfile
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls
tools/salt-install/local.params.example.multiple_hosts
tools/salt-install/local.params.example.single_host_multiple_hostnames
tools/salt-install/local.params.example.single_host_single_hostname
tools/salt-install/provision.sh
tools/salt-install/tests/run-test.sh

index 6f74955cd1c8d0940c979b70867a6cbbfda5aacb..0191c7f0df6f768959e7716e95abd68128e21bf9 100644 (file)
@@ -13,26 +13,26 @@ class WorkUnitsControllerTest < ActionController::TestCase
   [
     ['foo', 10, 25,
       ['/pipeline_instances/zzzzz-d1hrv-1xfj6xkicf2muk2',
-       '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+       '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7'],
       ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
     ['pipeline_with_tagged_collection_input', 1, 1,
       ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3'],
-      ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+      ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
     ['no_such_match', 0, 0,
       [],
-      ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+      ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
        '/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
        '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
        '/container_requests/zzzzz-xvhdp-cr4completedcr2']],
   ].each do |search_filter, expected_min, expected_max, expected, not_expected|
     test "all_processes page for search filter '#{search_filter}'" do
-      work_units_index(filters: [['any','@@', search_filter]], show_children: true)
+      work_units_index(filters: [['any','ilike', "%#{search_filter}%"]], show_children: true)
       assert_response :success
 
       # Verify that expected number of processes are found
index 4f2ebbc554d624440cd4dc5251667c7c5ecadfba..36b29468ff8b1012d32232b1031ee8f2cf4f6ab3 100644 (file)
@@ -14,7 +14,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
 
   [[true, 25, 100,
     ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
-     '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+     '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
      '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7',
      '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
      '/container_requests/zzzzz-xvhdp-cr4completedcr2',
@@ -23,7 +23,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
      '/container_requests/zzzzz-xvhdp-oneof60crs00001']],
    [false, 25, 100,
     ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
-     '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+     '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
      '/container_requests/zzzzz-xvhdp-cr4completedcr2'],
     ['/pipeline_instances/zzzzz-d1hrv-scarxiyajtshq3l',
      '/container_requests/zzzzz-xvhdp-oneof60crs00001',
index e404c0bc7c1e801d8d6cc4bd1d17567bd6c184c7..22bb065872e357aae2db7fc0437203d949c452df 100755 (executable)
@@ -621,6 +621,11 @@ fpm_build_virtualenv () {
   LICENSE_STRING=`grep license $WORKSPACE/$PKG_DIR/setup.py|cut -f2 -d=|sed -e "s/[',\\"]//g"`
   COMMAND_ARR+=('--license' "$LICENSE_STRING")
 
+  if [[ "$FORMAT" == "rpm" ]]; then
+    # Make sure to conflict with the old rh-python36 packages we used to publish
+    COMMAND_ARR+=('--conflicts' "rh-python36-python-$PKG")
+  fi
+
   if [[ "$DEBUG" != "0" ]]; then
     COMMAND_ARR+=('--verbose' '--log' 'info')
   fi
index b18607ebb7490622d38e119ad6a0f0383fdb35ba..ff924e2f378381f4c8f4e586f3c2c3570d8202e2 100644 (file)
@@ -257,6 +257,7 @@ navbar:
     - Containers API (lsf):
       - install/crunch2-lsf/install-dispatch.html.textile.liquid
     - Additional configuration:
+      - install/singularity.html.textile.liquid
       - install/container-shell-access.html.textile.liquid
     - External dependencies:
       - install/install-postgresql.html.textile.liquid
index b40082deba630f6fa208646b374e9bee311dd0cf..9e7410260f8955ea35f1d6e4790e792feb1c670f 100644 (file)
@@ -39,6 +39,10 @@ h2(#main). development main (as of 2021-07-15)
 
 "Upgrading from 2.2.0":#v2_2_0
 
+h3. Removed deprecated '@@' search operator
+
+The '@@' full text search operator, previously deprecated, has been removed. To perform a string search across multiple columns, use the 'ilike' operator on 'any' column as described in the "available list method filter section":{{site.baseurl}}/api/methods.html#substringsearchfilter of the API documentation.
+
 h3. Storage classes must be defined explicitly
 
 If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details.
@@ -51,14 +55,14 @@ h3. crunch-dispatch-local now requires config.yml
 
 The @crunch-dispatch-local@ dispatcher now reads the API host and token from the system wide @/etc/arvados/config.yml@ .  It will fail to start that file is not found or not readable.
 
-h2(#v2_2_0). v2.2.0 (2021-06-03)
-
-"Upgrading from 2.1.0":#v2_1_0
-
 h3. Multi-file docker image collections
 
 Typically a docker image collection contains a single @.tar@ file at the top level. Handling of atypical cases has changed. If a docker image collection contains files with extensions other than @.tar@, they will be ignored (previously they could cause errors). If a docker image collection contains multiple @.tar@ files, it will cause an error at runtime, "cannot choose from multiple tar files in image collection" (previously one of the @.tar@ files was selected). Subdirectories are ignored. The @arv keep docker@ command always creates a collection with a single @.tar@ file, and never uses subdirectories, so this change will not affect most users.
 
+h2(#v2_2_0). v2.2.0 (2021-06-03)
+
+"Upgrading from 2.1.0":#v2_1_0
+
 h3. New spelling of S3 credential configs
 
 If you use the S3 driver for Keep volumes and specify credentials in your configuration file (as opposed to using an IAM role), you should change the spelling of the @AccessKey@ and @SecretKey@ config keys to @AccessKeyID@ and @SecretAccessKey@. If you don't update them, the previous spellings will still be accepted, but warnings will be logged at server startup.
index c6e4ba00a74d8f1dcc440dfd83c125c17c9d6c1b..670a9e0da3d96ed16f8de9e053ae5a746cf0aa31 100644 (file)
@@ -96,7 +96,7 @@ table(table table-bordered table-condensed).
 |1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@|
 |2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@|
 
-The following operators are available.[1]
+The following operators are available.
 
 table(table table-bordered table-condensed).
 |_. Operator|_. Operand type|_. Description|_. Example|
@@ -167,5 +167,3 @@ table(table table-bordered table-condensed).
 |_. Argument |_. Type |_. Description |_. Location |
 {background:#ccffcc}.|uuid|string|The UUID of the resource in question.|path||
 |{resource_type}|object||query||
-
-fn1^. NOTE: The filter operator for full-text search (@@) which previously worked (but was undocumented) is deprecated and will be removed in a future release.
diff --git a/doc/install/singularity.html.textile.liquid b/doc/install/singularity.html.textile.liquid
new file mode 100644 (file)
index 0000000..1f38253
--- /dev/null
@@ -0,0 +1,42 @@
+---
+layout: default
+navsection: installguide
+title: Singularity container runtime
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados can be configured to use "Singularity":https://sylabs.io/singularity/ instead of Docker to execute containers on cloud nodes or a SLURM/LSF cluster. Singularity may be preferable due to its simpler installation and lack of long-running daemon process and special system users/groups.
+
+Please note:
+* *Singularity support is currently considered experimental.*
+* Even when using the singularity runtime, users' container images are expected to be saved in Docker format using @arv keep docker@. Arvados converts the Docker image to Singularity format (@.sif@) at runtime as needed. Specifying a @.sif@ file as an image when submitting a container request is not yet supported.
+* Singularity does not limit the amount of memory available in a container. Each container will have access to all memory on the host where it runs, unless memory use is restricted by SLURM/LSF.
+* Programs running in containers may behave differently due to differences between Singularity and Docker.
+** The root (image) filesystem is read-only in a Singularity container. Programs that attempt to write outside a designated output or temporary directory are likely to fail.
+** The Docker ENTRYPOINT instruction is ignored.
+* Arvados is currently tested with Singularity version 3.5.2.
+
+To use singularity, first make sure "Singularity is installed":https://sylabs.io/guides/3.5/user-guide/quick_start.html on your cloud worker image or SLURM/LSF compute nodes as applicable. Note @squashfs-tools@ is required.
+
+<notextile>
+<pre><code>$ <span class="userinput">singularity version</span>
+3.5.2
+$ <span class="userinput">mksquashfs -version</span>
+mksquashfs version 4.3-git (2014/06/09)
+[...]
+</code></pre>
+</notextile>
+
+Then update @Containers.RuntimeEngine@ in your cluster configuration:
+
+<notextile>
+<pre><code>      # Container runtime: "docker" (default) or "singularity" (experimental)
+      RuntimeEngine: singularity
+</code></pre>
+</notextile>
+
+Restart your dispatcher (@crunch-dispatch-slurm@, @arvados-dispatch-cloud@, or @arvados-dispatch-lsf@) after updating your configuration file.
index 0987218e3c0f7a84aff9316614e2c964bc70a561..206b75d58f20695c34f322236ee249ee5f3fefab 100644 (file)
@@ -24,29 +24,34 @@ For portability, most Arvados extensions should go into the @hints@ section of y
 {% codeblock as yaml %}
 hints:
   arv:RunInSingleContainer: {}
+
   arv:RuntimeConstraints:
     keep_cache: 123456
     outputDirType: keep_output_dir
+
   arv:PartitionRequirement:
     partition: dev_partition
+
   arv:APIRequirement: {}
-  cwltool:LoadListingRequirement:
-    loadListing: shallow_listing
+
   arv:IntermediateOutput:
     outputTTL: 3600
-  arv:ReuseRequirement:
-    enableReuse: false
+
   cwltool:Secrets:
     secrets: [input1, input2]
-  cwltool:TimeLimit:
-    timelimit: 14400
+
   arv:WorkflowRunnerResources:
     ramMin: 2048
     coresMin: 2
     keep_cache: 512
+
   arv:ClusterTarget:
     cluster_id: clsr1
     project_uuid: clsr1-j7d0g-qxc4jcji7n4lafx
+
+  arv:OutputStorageClass:
+    intermediateStorageClass: fast_storage
+    finalStorageClass: robust_storage
 {% endcodeblock %}
 
 h2(#RunInSingleContainer). arv:RunInSingleContainer
@@ -120,6 +125,15 @@ table(table table-bordered table-condensed).
 |cluster_id|string|The five-character alphanumeric cluster id (uuid prefix) where a container or subworkflow will execute.  May be an expression.|
 |project_uuid|string|The uuid of the project which will own container request and output of the container.  May be an expression.|
 
+h2(#OutputStorageClass). arv:OutputStorageClass
+
+Specify the "storage class":{{site.baseurl}}/user/topics/storage-classes.html to use for intermediate and final outputs.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|intermediateStorageClass|string or array of strings|The storage class for output of intermediate steps.  For example, faster "hot" storage.|
+|finalStorageClass_uuid|string or array of strings|The storage class for the final output.  |
+
 h2. arv:dockerCollectionPDH
 
 This is an optional extension field appearing on the standard @DockerRequirement@.  It specifies the portable data hash of the Arvados collection containing the Docker image.  If present, it takes precedence over @dockerPull@ or @dockerImageId@.
@@ -135,6 +149,16 @@ h1. Deprecated extensions
 
 The following extensions are deprecated because equivalent features are part of the CWL v1.1 standard.
 
+{% codeblock as yaml %}
+hints:
+  cwltool:LoadListingRequirement:
+    loadListing: shallow_listing
+  arv:ReuseRequirement:
+    enableReuse: false
+  cwltool:TimeLimit:
+    timelimit: 14400
+{% endcodeblock %}
+
 h2. cwltool:LoadListingRequirement
 
 For CWL v1.1 scripts, this is deprecated in favor of "loadListing":https://www.commonwl.org/v1.1/CommandLineTool.html#CommandInputParameter or "LoadListingRequirement":https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
index 761d198ee4f504bc477b6575d9d1cde0c5b25085..a1c102593a4f1d790f1d1921627e6f5ec8bc07a3 100644 (file)
@@ -47,19 +47,19 @@ table(table table-bordered table-condensed).
 |==--no-wait==|             Submit workflow runner and exit.|
 |==--log-timestamps==|      Prefix logging lines with timestamp|
 |==--no-log-timestamps==|   No timestamp on logging lines|
-|==--api== {containers}|Select work submission API.  Only supports 'containers'|
 |==--compute-checksum==|    Compute checksum of contents while collecting outputs|
 |==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner (default 1024)|
 |==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner|
 |==--always-submit-runner==|When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool|
-|==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one (containers API only).|
-|==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster (containers API only)|
+|==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one.|
+|==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster|
 |==--name NAME==|Name to use for workflow execution instance.|
 |==--on-error== {stop,continue}|Desired workflow behavior when a step fails.  One of 'stop' (do not submit any more steps) or 'continue' (may submit other steps that are not downstream from the error). Default is 'continue'.|
 |==--enable-dev==|Enable loading and running development versions of CWL spec.|
-|==--storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when saving workflow output to Keep.|
+|==--storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when saving the final workflow output to Keep.|
+|==--intermediate-storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when intermediate workflow output to Keep.|
 |==--intermediate-output-ttl== N|If N > 0, intermediate output collections will be trashed N seconds after creation. Default is 0 (don't trash).|
-|==--priority== PRIORITY|Workflow priority (range 1..1000, higher has precedence over lower, containers api only)|
+|==--priority== PRIORITY|Workflow priority (range 1..1000, higher has precedence over lower)|
 |==--thread-count== THREAD_COUNT|Number of threads to use for container submit and output collection.|
 |==--http-timeout== HTTP_TIMEOUT|API request timeout in seconds. Default is 300 seconds (5 minutes).|
 |==--trash-intermediate==|Immediately trash intermediate outputs on workflow success.|
index bb1c7dd53e8cdaffd88d83b95b2177ae571fa55a..8a97df6e162187fc546d15f56b79b624c7adb648 100644 (file)
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: userguide
-title: "Working with Docker images"
+title: "Working with container images"
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,7 +9,9 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-This page describes how to set up the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a workflow step will be run in using "Docker.":https://www.docker.com/  Docker is a tool for building and running containers that isolate applications from other applications running on the same node.  For detailed information about Docker, see the "Docker User Guide.":https://docs.docker.com/userguide/
+This page describes how to set up the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a workflow step will be run in using "Docker":https://www.docker.com/ or "Singularity":https://sylabs.io/singularity/.  Docker and Singularity are tools for building and running containers that isolate applications from other applications running on the same node.  For detailed information, see the "Docker User Guide":https://docs.docker.com/userguide/ and the "Introduction to Singularity":https://sylabs.io/guides/3.5/user-guide/introduction.html.
+
+Note that Arvados always works with Docker images, even when it is configured to use Singularity to run containers. There are some differences between the two runtimes that can affect your containers. See the "Singularity container runtime":{{site.baseurl}}/install/singularity.html page for details.
 
 This page describes:
 
@@ -19,7 +21,7 @@ This page describes:
 
 {% include 'tutorial_expectations_workstation' %}
 
-You also need ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker.  You can test this by running @docker version@.  If you receive a permission denied error, your user account may need to be added to the @docker@ group.  If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin.
+You also need to ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker.  You can test this by running @docker version@.  If you receive a permission denied error, your user account may need to be added to the @docker@ group.  If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin.
 
 h2(#create). Create a custom image using a Dockerfile
 
index 650c3709559546aea019c2348607a30d79246cb2..06fd4d811c2b0e927f13e4fbb1af2c3f8395b386 100644 (file)
@@ -10,9 +10,11 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-Storage classes (alternately known as "storage tiers") allow you to control which volumes should be used to store particular collection data blocks.  This can be used to implement data storage policies such as moving data to archival storage.
+Storage classes (sometimes called as "storage tiers") allow you to control which back-end storage volumes should be used to store the data blocks of a particular collection.  This can be used to implement data storage policies such as assigning data collections to "fast", "robust" or "archival" storage.
 
-Names of storage classes are internal to the cluster and decided by the administrator.  Aside from "default", Arvados currently does not define any standard storage class names.
+Names of storage classes are internal to the cluster and decided by the administrator.  Aside from "default", Arvados currently does not define any standard storage class names.  Consult your cluster administrator for guidance on what storage classes are available to use on your specific Arvados instance.
+
+Note that when changing the storage class of an existing collection, it does not take effect immediately, the blocks are asynchronously copied to the new storage class and removed from the old one.  The collection field "storage_classes_confirmed" is updated to reflect when data blocks have been successfully copied.
 
 h3. arv-put
 
@@ -32,14 +34,12 @@ $ arv-mount --storage-classes=transient --mount-tmp=scratch keep
 
 h3. arvados-cwl-runner
 
-You may also specify the desired storage class for the final output collection produced by @arvados-cwl-runner@:
+You may specify the desired storage class for the intermediate and final output collections produced by @arvados-cwl-runner@ on the command line or using the "arv:OutputStorageClass hint":{{site.baseurl}}/user/cwl/cwl-extensions.html#OutputStorageClass .
 
 <pre>
-$ arvados-cwl-runner --storage-classes=hot myworkflow.cwl myinput.yml
+$ arvados-cwl-runner --intermediate-storage-classes=hot_storage --storage-classes=robust_storage myworkflow.cwl myinput.yml
 </pre>
 
-(Note: intermediate collections produced by a workflow run will use the cluster's default storage class(es).)
-
 h3. arv command line
 
 You may set the storage class on an existing collection by setting the "storage_classes_desired" field of a Collection.  For example, at the command line:
index 8640e71418fa88c51a7a8c3436949d9371bafee9..ec32613905f8ad9b40bde6820199413f5d30fe0d 100644 (file)
@@ -273,6 +273,7 @@ Clusters:
       AdminNotifierEmailFrom: arvados@example.com
       EmailSubjectPrefix: "[ARVADOS] "
       UserNotifierEmailFrom: arvados@example.com
+      UserNotifierEmailBcc: {}
       NewUserNotificationRecipients: {}
       NewInactiveUserNotificationRecipients: {}
 
index 2a3d0e173a6d23d36474a55627f6cf73b758df4e..065011cc2e8d31d2fb133f6546a32a8f506861b2 100644 (file)
@@ -227,6 +227,7 @@ var whitelist = map[string]bool{
        "Users.NewUsersAreActive":                             false,
        "Users.PreferDomainForUsername":                       false,
        "Users.UserNotifierEmailFrom":                         false,
+       "Users.UserNotifierEmailBcc":                          false,
        "Users.UserProfileNotificationAddress":                false,
        "Users.UserSetupMailText":                             false,
        "Volumes":                                             true,
index 55e8ba8f36da791007b8ad9ccf755b79733d4698..36a21d4c357165628aa321ef8d97b67c1ea894bd 100644 (file)
@@ -279,6 +279,7 @@ Clusters:
       AdminNotifierEmailFrom: arvados@example.com
       EmailSubjectPrefix: "[ARVADOS] "
       UserNotifierEmailFrom: arvados@example.com
+      UserNotifierEmailBcc: {}
       NewUserNotificationRecipients: {}
       NewInactiveUserNotificationRecipients: {}
 
index 26f0dbb0d1388da1886cea726fc644648b4d57e3..6851442054e1f49e8cde8c87dcced6d9eea0918a 100644 (file)
@@ -20,6 +20,7 @@ import (
        "path/filepath"
        "strconv"
        "strings"
+       "sync"
 
        "git.arvados.org/arvados.git/lib/boot"
        "git.arvados.org/arvados.git/lib/config"
@@ -187,6 +188,49 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
        c.Check(coll.PortableDataHash, check.Equals, pdh)
 }
 
+// Tests bug #18004
+func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) {
+       conn1 := s.testClusters["z1111"].Conn()
+       rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+       rootctx2, _, _ := s.testClusters["z2222"].RootClients()
+       conn2 := s.testClusters["z2222"].Conn()
+       userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2@example.com", true)
+
+       var wg1, wg2 sync.WaitGroup
+       creqs := 100
+
+       // Make concurrent requests to z2222 with a local token to make sure more
+       // than one worker is listening.
+       wg1.Add(1)
+       for i := 0; i < creqs; i++ {
+               wg2.Add(1)
+               go func() {
+                       defer wg2.Done()
+                       wg1.Wait()
+                       _, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{})
+                       c.Check(err, check.IsNil, check.Commentf("warm up phase failed"))
+               }()
+       }
+       wg1.Done()
+       wg2.Wait()
+
+       // Real test pass -- use a new remote token than the one used in the warm-up
+       // phase.
+       wg1.Add(1)
+       for i := 0; i < creqs; i++ {
+               wg2.Add(1)
+               go func() {
+                       defer wg2.Done()
+                       wg1.Wait()
+                       // Retrieve the remote collection from cluster z2222.
+                       _, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{})
+                       c.Check(err, check.IsNil, check.Commentf("testing phase failed"))
+               }()
+       }
+       wg1.Done()
+       wg2.Wait()
+}
+
 func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) {
        if _, err := exec.LookPath("s3cmd"); err != nil {
                c.Skip("s3cmd not in PATH")
@@ -502,7 +546,7 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
 }
 
 // We test the direct access to the database
-// normally an integration test would not have a database access, but  in this case we need
+// normally an integration test would not have a database access, but in this case we need
 // to test tokens that are secret, so there is no API response that will give them back
 func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) {
        ctx := context.Background()
index 04db611fbef349692bca2dd38a4988225d980917..7bbb3b29e8b6a27181df115f29b719304f51cb56 100644 (file)
@@ -22,6 +22,7 @@ import cwltool.main
 import cwltool.workflow
 import cwltool.process
 import cwltool.argparser
+from cwltool.errors import WorkflowException
 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
 from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing
 
@@ -178,7 +179,9 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="Enable loading and running development versions "
                              "of the CWL standards.", default=False)
     parser.add_argument('--storage-classes', default="default",
-                        help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
+                        help="Specify comma separated list of storage classes to be used when saving final workflow output to Keep.")
+    parser.add_argument('--intermediate-storage-classes', default="default",
+                        help="Specify comma separated list of storage classes to be used when saving intermediate workflow output to Keep.")
 
     parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
                         help="If N > 0, intermediate output collections will be trashed N seconds after creation.  Default is 0 (don't trash).",
@@ -245,7 +248,8 @@ def add_arv_hints():
         "http://commonwl.org/cwltool#LoadListingRequirement",
         "http://arvados.org/cwl#IntermediateOutput",
         "http://arvados.org/cwl#ReuseRequirement",
-        "http://arvados.org/cwl#ClusterTarget"
+        "http://arvados.org/cwl#ClusterTarget",
+        "http://arvados.org/cwl#OutputStorageClass"
     ])
 
 def exit_signal_handler(sigcode, frame):
@@ -259,10 +263,6 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
     job_order_object = None
     arvargs = parser.parse_args(args)
 
-    if len(arvargs.storage_classes.strip().split(',')) > 1:
-        logger.error(str(u"Multiple storage classes are not supported currently."))
-        return 1
-
     arvargs.use_container = True
     arvargs.relax_path_checks = True
     arvargs.print_supported_versions = False
@@ -301,6 +301,9 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
+    except WorkflowException as e:
+        logger.error(e, exc_info=(sys.exc_info()[1] if arvargs.debug else False))
+        return 1
     except Exception:
         logger.exception("Error creating the Arvados CWL Executor")
         return 1
index 8a3fa3173a9dd98baa5d8aa1ab74e19fe4bceb6d..2a2e857e073e1c14aa8e294a035182f03ab40549 100644 (file)
@@ -266,3 +266,32 @@ $graph:
     project_uuid:
       type: string?
       doc: The project that will own the container requests and intermediate collections
+
+
+- name: OutputStorageClass
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify the storage class to be used for intermediate and final output
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:StorageClassHint"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    intermediateStorageClass:
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+      doc: One or more storages classes
+    finalStorageClass:
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+      doc: One or more storages classes
index 95ed0a75bc69bfe94929ce0e2ee80adf6dcec7e6..fb14a63e315fc3dd7e56d8f6b832df87aae243a7 100644 (file)
@@ -210,3 +210,31 @@ $graph:
     project_uuid:
       type: string?
       doc: The project that will own the container requests and intermediate collections
+
+- name: OutputStorageClass
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify the storage class to be used for intermediate and final output
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:StorageClassHint"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    intermediateStorageClass:
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+      doc: One or more storages classes
+    finalStorageClass:
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+      doc: One or more storages classes
index 95ed0a75bc69bfe94929ce0e2ee80adf6dcec7e6..dd5919fc88033b44eefdcb851711ac1c447bc8c6 100644 (file)
@@ -210,3 +210,32 @@ $graph:
     project_uuid:
       type: string?
       doc: The project that will own the container requests and intermediate collections
+
+
+- name: OutputStorageClass
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify the storage class to be used for intermediate and final output
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:StorageClassHint"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    intermediateStorageClass:
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+      doc: One or more storages classes
+    finalStorageClass:
+      type:
+        - "null"
+        - string
+        - type: array
+          items: string
+      doc: One or more storages classes
index 72ef14f6731baf83de87df28534a5c76b5a7dc42..c9170c51b732deee8f5a8bc383746c527ae5def7 100644 (file)
@@ -273,6 +273,12 @@ class ArvadosContainer(JobBase):
         if self.output_ttl < 0:
             raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])
 
+        storage_class_req, _ = self.get_requirement("http://arvados.org/cwl#OutputStorageClass")
+        if storage_class_req and storage_class_req.get("intermediateStorageClass"):
+            container_request["output_storage_classes"] = aslist(storage_class_req["intermediateStorageClass"])
+        else:
+            container_request["output_storage_classes"] = runtimeContext.intermediate_storage_classes.strip().split(",")
+
         if self.timelimit is not None and self.timelimit > 0:
             scheduling_parameters["max_run_time"] = self.timelimit
 
@@ -495,6 +501,9 @@ class RunnerContainer(Runner):
         if runtimeContext.storage_classes != "default":
             command.append("--storage-classes=" + runtimeContext.storage_classes)
 
+        if runtimeContext.intermediate_storage_classes != "default":
+            command.append("--intermediate-storage-classes=" + runtimeContext.intermediate_storage_classes)
+
         if self.on_error:
             command.append("--on-error=" + self.on_error)
 
index 89176923519ee4c737d9376032463dee08a80102..13664a8dfb0d57df0477d4c627928b9be17ad8d7 100644 (file)
@@ -19,9 +19,15 @@ def validate_cluster_target(arvrunner, runtimeContext):
     if runtimeContext.project_uuid:
         cluster_target = runtimeContext.submit_runner_cluster or arvrunner.api._rootDesc["uuidPrefix"]
         if not runtimeContext.project_uuid.startswith(cluster_target):
-            raise WorkflowException("Project uuid '%s' must be for target cluster '%s'" % (runtimeContext.project_uuid, cluster_target))
+            raise WorkflowException("Project uuid '%s' should start with id of target cluster '%s'" % (runtimeContext.project_uuid, cluster_target))
+
         try:
-            arvrunner.api.groups().get(uuid=runtimeContext.project_uuid).execute()
+            if runtimeContext.project_uuid[5:12] == '-tpzed-':
+                arvrunner.api.users().get(uuid=runtimeContext.project_uuid).execute()
+            else:
+                proj = arvrunner.api.groups().get(uuid=runtimeContext.project_uuid).execute()
+                if proj["group_class"] != "project":
+                    raise Exception("not a project, group_class is '%s'" % (proj["group_class"]))
         except Exception as e:
             raise WorkflowException("Invalid project uuid '%s': %s" % (runtimeContext.project_uuid, e))
 
index 8cfe22ad7b6619f1f02d95eaf71153e44e52fd01..77d4027ccbabccf72e3fe5f60ad049726c1b99d1 100644 (file)
@@ -29,6 +29,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.wait = True
         self.cwl_runner_job = None
         self.storage_classes = "default"
+        self.intermediate_storage_classes = "default"
         self.current_container = None
         self.http_timeout = 300
         self.submit_runner_cluster = None
index f60c480873b833dca11b0dba1a6cc853f4c29e2c..edb9d5b523c09bee4aa43f16705e27f2f15194d9 100644 (file)
@@ -42,7 +42,7 @@ from .context import ArvLoadingContext, ArvRuntimeContext
 from ._version import __version__
 
 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
-from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing, visit_class
+from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing, visit_class, aslist
 from cwltool.command_line_tool import compute_checksums
 from cwltool.load_tool import load_tool
 
@@ -549,6 +549,12 @@ The 'jobs' API is no longer supported.
         if runtimeContext.submit_request_uuid and self.work_api != "containers":
             raise Exception("--submit-request-uuid requires containers API, but using '{}' api".format(self.work_api))
 
+        default_storage_classes = ",".join([k for k,v in self.api.config()["StorageClasses"].items() if v.get("Default") is True])
+        if runtimeContext.storage_classes == "default":
+            runtimeContext.storage_classes = default_storage_classes
+        if runtimeContext.intermediate_storage_classes == "default":
+            runtimeContext.intermediate_storage_classes = default_storage_classes
+
         if not runtimeContext.name:
             runtimeContext.name = self.name = updated_tool.tool.get("label") or updated_tool.metadata.get("label") or os.path.basename(updated_tool.tool["id"])
 
@@ -771,7 +777,13 @@ The 'jobs' API is no longer supported.
             if self.output_tags is None:
                 self.output_tags = ""
 
-            storage_classes = runtimeContext.storage_classes.strip().split(",")
+            storage_classes = ""
+            storage_class_req, _ = tool.get_requirement("http://arvados.org/cwl#OutputStorageClass")
+            if storage_class_req and storage_class_req.get("finalStorageClass"):
+                storage_classes = aslist(storage_class_req["finalStorageClass"])
+            else:
+                storage_classes = runtimeContext.storage_classes.strip().split(",")
+
             self.final_output, self.final_output_collection = self.make_output_collection(self.output_name, storage_classes, self.output_tags, self.final_output)
             self.set_crunch_output()
 
index 09983f87a2cea8f46b8790e6749c05a5bc57d8f8..2b46b89c604cafc0f91ee3966ecd1a3c068dbad7 100644 (file)
@@ -163,7 +163,8 @@ class TestContainer(unittest.TestCase):
                         'cwd': '/var/spool/cwl',
                         'scheduling_parameters': {},
                         'properties': {},
-                        'secret_mounts': {}
+                        'secret_mounts': {},
+                        'output_storage_classes': ["default"]
                     }))
 
     # The test passes some fields in builder.resources
@@ -250,7 +251,8 @@ class TestContainer(unittest.TestCase):
                 'partitions': ['blurb']
             },
             'properties': {},
-            'secret_mounts': {}
+            'secret_mounts': {},
+            'output_storage_classes': ["default"]
         }
 
         call_body = call_kwargs.get('body', None)
@@ -379,7 +381,8 @@ class TestContainer(unittest.TestCase):
             'scheduling_parameters': {
             },
             'properties': {},
-            'secret_mounts': {}
+            'secret_mounts': {},
+            'output_storage_classes': ["default"]
         }
 
         call_body = call_kwargs.get('body', None)
@@ -463,7 +466,8 @@ class TestContainer(unittest.TestCase):
                     'cwd': '/var/spool/cwl',
                     'scheduling_parameters': {},
                     'properties': {},
-                    'secret_mounts': {}
+                    'secret_mounts': {},
+                    'output_storage_classes': ["default"]
                 }))
 
     @mock.patch("arvados.collection.Collection")
@@ -696,7 +700,8 @@ class TestContainer(unittest.TestCase):
                     'cwd': '/var/spool/cwl',
                     'scheduling_parameters': {},
                     'properties': {},
-                    'secret_mounts': {}
+                    'secret_mounts': {},
+                    'output_storage_classes': ["default"]
                 }))
 
     # The test passes no builder.resources
@@ -791,7 +796,8 @@ class TestContainer(unittest.TestCase):
                             "content": "username: user\npassword: blorp\n",
                             "kind": "text"
                         }
-                    }
+                    },
+                    'output_storage_classes': ["default"]
                 }))
 
     # The test passes no builder.resources
@@ -835,6 +841,79 @@ class TestContainer(unittest.TestCase):
         self.assertEqual(42, kwargs['body']['scheduling_parameters'].get('max_run_time'))
 
 
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+    def test_setting_storage_class(self, keepdocker):
+        arv_docker_clear_cache()
+
+        runner = mock.MagicMock()
+        runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
+        runner.secret_store = cwltool.secrets.SecretStore()
+
+        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
+        runner.api.collections().get().execute.return_value = {
+            "portable_data_hash": "99999999999999999999999999999993+99"}
+
+        tool = cmap({
+            "inputs": [],
+            "outputs": [],
+            "baseCommand": "ls",
+            "arguments": [{"valueFrom": "$(runtime.outdir)"}],
+            "id": "#",
+            "class": "CommandLineTool",
+            "hints": [
+                {
+                    "class": "http://arvados.org/cwl#OutputStorageClass",
+                    "finalStorageClass": ["baz_sc", "qux_sc"],
+                    "intermediateStorageClass": ["foo_sc", "bar_sc"]
+                }
+            ]
+        })
+
+        loadingContext, runtimeContext = self.helper(runner, True)
+
+        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
+        arvtool.formatgraph = None
+
+        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+            j.run(runtimeContext)
+            runner.api.container_requests().create.assert_called_with(
+                body=JsonDiffMatcher({
+                    'environment': {
+                        'HOME': '/var/spool/cwl',
+                        'TMPDIR': '/tmp'
+                    },
+                    'name': 'test_run_True',
+                    'runtime_constraints': {
+                        'vcpus': 1,
+                        'ram': 1073741824
+                    },
+                    'use_existing': True,
+                    'priority': 500,
+                    'mounts': {
+                        '/tmp': {'kind': 'tmp',
+                                 "capacity": 1073741824
+                             },
+                        '/var/spool/cwl': {'kind': 'tmp',
+                                           "capacity": 1073741824 }
+                    },
+                    'state': 'Committed',
+                    'output_name': 'Output for step test_run_True',
+                    'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                    'output_path': '/var/spool/cwl',
+                    'output_ttl': 0,
+                    'container_image': '99999999999999999999999999999993+99',
+                    'command': ['ls', '/var/spool/cwl'],
+                    'cwd': '/var/spool/cwl',
+                    'scheduling_parameters': {},
+                    'properties': {},
+                    'secret_mounts': {},
+                    'output_storage_classes': ["foo_sc", "bar_sc"]
+                }))
+
+
 class TestWorkflow(unittest.TestCase):
     def setUp(self):
         cwltool.process._names = set()
@@ -972,7 +1051,8 @@ class TestWorkflow(unittest.TestCase):
                 "scheduling_parameters": {},
                 "secret_mounts": {},
                 "state": "Committed",
-                "use_existing": True
+                "use_existing": True,
+                'output_storage_classes': ["default"]
             }))
         mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)])
         mockc.open().__enter__().write.assert_has_calls([mock.call(
@@ -1074,7 +1154,8 @@ class TestWorkflow(unittest.TestCase):
                 ],
                 'use_existing': True,
                 'output_name': u'Output for step echo-subwf',
-                'cwd': '/var/spool/cwl'
+                'cwd': '/var/spool/cwl',
+                'output_storage_classes': ["default"]
             }))
 
     def test_default_work_api(self):
index 12daf6b6702c906544a04c3d4ce034e2f7c01eb1..1b646a8e4459dfe5677fb18ca997a393248b6e8f 100644 (file)
@@ -95,6 +95,11 @@ def stubs(func):
         stubs.api.containers().current().execute.return_value = {
             "uuid": stubs.fake_container_uuid,
         }
+        stubs.api.config()["StorageClasses"].items.return_value = {
+            "default": {
+                "Default": True
+            }
+        }.items()
 
         class CollectionExecute(object):
             def __init__(self, exe):
@@ -342,14 +347,6 @@ class TestSubmit(unittest.TestCase):
         cwltool.process._names = set()
         arvados_cwl.arvdocker.arv_docker_clear_cache()
 
-    @stubs
-    def test_error_when_multiple_storage_classes_specified(self, stubs):
-        storage_classes = "foo,bar"
-        exited = arvados_cwl.main(
-                ["--debug", "--storage-classes", storage_classes,
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                sys.stdin, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 1)
 
     @mock.patch("time.sleep")
     @stubs
@@ -526,6 +523,27 @@ class TestSubmit(unittest.TestCase):
                          stubs.expect_container_request_uuid + '\n')
         self.assertEqual(exited, 0)
 
+    @stubs
+    def test_submit_multiple_storage_classes(self, stubs):
+        exited = arvados_cwl.main(
+            ["--debug", "--submit", "--no-wait", "--api=containers", "--storage-classes=foo,bar", "--intermediate-storage-classes=baz",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
+                                       '--no-log-timestamps', '--disable-validate', '--disable-color',
+                                       '--eval-timeout=20', '--thread-count=0',
+                                       '--enable-reuse', "--collection-cache-size=256", "--debug",
+                                       "--storage-classes=foo,bar", "--intermediate-storage-classes=baz", '--on-error=continue',
+                                       '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
+
     @mock.patch("cwltool.task_queue.TaskQueue")
     @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
     @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
@@ -568,6 +586,27 @@ class TestSubmit(unittest.TestCase):
         make_output.assert_called_with(u'Output of submit_wf.cwl', ['default'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
         self.assertEqual(exited, 0)
 
+    @mock.patch("cwltool.task_queue.TaskQueue")
+    @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
+    @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
+    @stubs
+    def test_storage_class_hint_to_make_output_collection(self, stubs, make_output, job, tq):
+        final_output_c = arvados.collection.Collection()
+        make_output.return_value = ({},final_output_c)
+
+        def set_final_output(job_order, output_callback, runtimeContext):
+            output_callback("zzzzz-4zz18-zzzzzzzzzzzzzzzz", "success")
+            return []
+        job.side_effect = set_final_output
+
+        exited = arvados_cwl.main(
+            ["--debug", "--local",
+                "tests/wf/submit_storage_class_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+        make_output.assert_called_with(u'Output of submit_storage_class_wf.cwl', ['foo', 'bar'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
+        self.assertEqual(exited, 0)
+
     @stubs
     def test_submit_container_output_ttl(self, stubs):
         exited = arvados_cwl.main(
@@ -853,6 +892,7 @@ class TestSubmit(unittest.TestCase):
     @stubs
     def test_submit_container_project(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid="+project_uuid,
                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
@@ -1265,12 +1305,14 @@ class TestSubmit(unittest.TestCase):
 
     @stubs
     def test_submit_validate_project_uuid(self, stubs):
+        # Fails with bad cluster prefix
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzb-j7d0g-zzzzzzzzzzzzzzz",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
             stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 1)
 
+        # Project lookup fails
         stubs.api.groups().get().execute.side_effect = Exception("Bad project")
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzz-j7d0g-zzzzzzzzzzzzzzx",
@@ -1278,6 +1320,14 @@ class TestSubmit(unittest.TestCase):
             stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 1)
 
+        # It should work this time because it is looking up a user (and only group is stubbed out to fail)
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzz-tpzed-zzzzzzzzzzzzzzx",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+        self.assertEqual(exited, 0)
+
+
     @mock.patch("arvados.collection.CollectionReader")
     @stubs
     def test_submit_uuid_inputs(self, stubs, collectionReader):
@@ -1382,6 +1432,7 @@ class TestCreateWorkflow(unittest.TestCase):
     @stubs
     def test_create(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
 
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
@@ -1411,6 +1462,7 @@ class TestCreateWorkflow(unittest.TestCase):
     @stubs
     def test_create_name(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
 
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
@@ -1486,6 +1538,7 @@ class TestCreateWorkflow(unittest.TestCase):
     @stubs
     def test_create_collection_per_tool(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
 
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
@@ -1515,6 +1568,7 @@ class TestCreateWorkflow(unittest.TestCase):
     @stubs
     def test_create_with_imports(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
 
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
@@ -1533,6 +1587,7 @@ class TestCreateWorkflow(unittest.TestCase):
     @stubs
     def test_create_with_no_input(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+        stubs.api.groups().get().execute.return_value = {"group_class": "project"}
 
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
diff --git a/sdk/cwl/tests/wf/submit_storage_class_wf.cwl b/sdk/cwl/tests/wf/submit_storage_class_wf.cwl
new file mode 100644 (file)
index 0000000..c365d96
--- /dev/null
@@ -0,0 +1,30 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Test case for arvados-cwl-runner
+#
+# Used to test whether scanning a workflow file for dependencies
+# (e.g. submit_tool.cwl) and uploading to Keep works as intended.
+
+class: Workflow
+cwlVersion: v1.0
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+hints:
+  arv:OutputStorageClass:
+    finalStorageClass: [foo, bar]
+inputs:
+  - id: x
+    type: File
+  - id: y
+    type: Directory
+  - id: z
+    type: Directory
+outputs: []
+steps:
+  - id: step1
+    in:
+      - { id: x, source: "#x" }
+    out: []
+    run: ../tool/submit_tool.cwl
index cc1de1be4295201f207da20eca3c436b84161ca0..4a7c18b3e06324ab0f37a2a8db54270aedcdc2c9 100644 (file)
@@ -234,6 +234,7 @@ type Cluster struct {
                NewUserNotificationRecipients         StringSet
                NewUsersAreActive                     bool
                UserNotifierEmailFrom                 string
+               UserNotifierEmailBcc                  StringSet
                UserProfileNotificationAddress        string
                PreferDomainForUsername               string
                UserSetupMailText                     string
index ad887d035a339ff0039884237d394e820a077023..03000a37308ad287a2b5f75538dd25ab4054a3be 100644 (file)
@@ -9,7 +9,12 @@ class UserNotifier < ActionMailer::Base
 
   def account_is_setup(user)
     @user = user
-    mail(to: user.email, subject: 'Welcome to Arvados - account enabled')
+    if not Rails.configuration.Users.UserNotifierEmailBcc.empty? then
+      @bcc = Rails.configuration.Users.UserNotifierEmailBcc.keys
+      mail(to: user.email, subject: 'Welcome to Arvados - account enabled', bcc: @bcc)
+    else
+      mail(to: user.email, subject: 'Welcome to Arvados - account enabled')
+    end
   end
 
 end
index 52f2cee064905fd6a81e4e9e60a774dfc80bab55..7c7ed759c60058b5915ad1d56505dba6b56d84dd 100644 (file)
@@ -319,7 +319,17 @@ class ApiClientAuthorization < ArvadosModel
         user.last_name = "from cluster #{remote_user_prefix}"
       end
 
-      user.save!
+      begin
+        user.save!
+      rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique
+        Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...")
+        # Some other request won the race: retry fetching the user record.
+        user = User.find_by_uuid(remote_user['uuid'])
+        if !user
+          Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}")
+          return nil
+        end
+      end
 
       if user.is_invited && !remote_user['is_invited']
         # Remote user is not "invited" state, they should be unsetup, which
@@ -364,12 +374,24 @@ class ApiClientAuthorization < ArvadosModel
       exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh,
              remote_token.andand['expires_at']].compact.min
       scopes = remote_token.andand['scopes'] || ['all']
-      auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
-        auth.user = user
-        auth.api_token = stored_secret
-        auth.api_client_id = 0
-        auth.scopes = scopes
-        auth.expires_at = exp
+      begin
+        retries ||= 0
+        auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
+          auth.user = user
+          auth.api_token = stored_secret
+          auth.api_client_id = 0
+          auth.scopes = scopes
+          auth.expires_at = exp
+        end
+      rescue ActiveRecord::RecordNotUnique
+        Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...")
+        # Some other request won the race: retry just once before erroring out
+        if (retries += 1) <= 1
+          retry
+        else
+          Rails.logger.warn("cannot find or create cached remote token #{token_uuid}")
+          return nil
+        end
       end
       auth.update_attributes!(user: user,
                               api_token: stored_secret,
index 1b3c96a8adfe55dd58085512969a59844d9a64d6..ea421a289b98cc628d6bfa9b0473a105a2986b0c 100644 (file)
@@ -96,6 +96,7 @@ arvcfg.declare_config "Users.UserProfileNotificationAddress", String, :user_prof
 arvcfg.declare_config "Users.AdminNotifierEmailFrom", String, :admin_notifier_email_from
 arvcfg.declare_config "Users.EmailSubjectPrefix", String, :email_subject_prefix
 arvcfg.declare_config "Users.UserNotifierEmailFrom", String, :user_notifier_email_from
+arvcfg.declare_config "Users.UserNotifierEmailBcc", Hash
 arvcfg.declare_config "Users.NewUserNotificationRecipients", Hash, :new_user_notification_recipients, ->(cfg, k, v) { arrayToHash cfg, "Users.NewUserNotificationRecipients", v }
 arvcfg.declare_config "Users.NewInactiveUserNotificationRecipients", Hash, :new_inactive_user_notification_recipients, method(:arrayToHash)
 arvcfg.declare_config "Login.LoginCluster", String
diff --git a/services/api/db/migrate/20210816191509_drop_fts_index.rb b/services/api/db/migrate/20210816191509_drop_fts_index.rb
new file mode 100644 (file)
index 0000000..4ee1f55
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class DropFtsIndex < ActiveRecord::Migration[5.2]
+  def fts_indexes
+    {
+      "collections" => "collections_full_text_search_idx",
+      "container_requests" => "container_requests_full_text_search_idx",
+      "groups" => "groups_full_text_search_idx",
+      "jobs" => "jobs_full_text_search_idx",
+      "pipeline_instances" => "pipeline_instances_full_text_search_idx",
+      "pipeline_templates" => "pipeline_templates_full_text_search_idx",
+      "workflows" => "workflows_full_text_search_idx",
+    }
+  end
+
+  def up
+    fts_indexes.keys.each do |t|
+      i = fts_indexes[t]
+      execute "DROP INDEX IF EXISTS #{i}"
+    end
+  end
+
+  def down
+    fts_indexes.keys.each do |t|
+      i = fts_indexes[t]
+      execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector})"
+    end
+  end
+end
index 2bca887212a331143065d117816b81dc383f9b91..2f7748335694310b09de911104a446ce54885093 100644 (file)
@@ -238,29 +238,6 @@ SET default_tablespace = '';
 
 SET default_with_oids = false;
 
---
--- Name: groups; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.groups (
-    id integer NOT NULL,
-    uuid character varying(255),
-    owner_uuid character varying(255),
-    created_at timestamp without time zone NOT NULL,
-    modified_by_client_uuid character varying(255),
-    modified_by_user_uuid character varying(255),
-    modified_at timestamp without time zone,
-    name character varying(255) NOT NULL,
-    description character varying(524288),
-    updated_at timestamp without time zone NOT NULL,
-    group_class character varying(255),
-    trash_at timestamp without time zone,
-    is_trashed boolean DEFAULT false NOT NULL,
-    delete_at timestamp without time zone,
-    properties jsonb DEFAULT '{}'::jsonb
-);
-
-
 --
 -- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: -
 --
@@ -571,6 +548,29 @@ CREATE SEQUENCE public.containers_id_seq
 ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id;
 
 
+--
+-- Name: groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.groups (
+    id integer NOT NULL,
+    uuid character varying(255),
+    owner_uuid character varying(255),
+    created_at timestamp without time zone NOT NULL,
+    modified_by_client_uuid character varying(255),
+    modified_by_user_uuid character varying(255),
+    modified_at timestamp without time zone,
+    name character varying(255) NOT NULL,
+    description character varying(524288),
+    updated_at timestamp without time zone NOT NULL,
+    group_class character varying(255),
+    trash_at timestamp without time zone,
+    is_trashed boolean DEFAULT false NOT NULL,
+    delete_at timestamp without time zone,
+    properties jsonb DEFAULT '{}'::jsonb
+);
+
+
 --
 -- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: -
 --
@@ -1722,13 +1722,6 @@ CREATE INDEX authorized_keys_search_index ON public.authorized_keys USING btree
 CREATE INDEX collection_index_on_properties ON public.collections USING gin (properties);
 
 
---
--- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -1743,13 +1736,6 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u
 CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
 
 
---
--- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: -
 --
@@ -1785,13 +1771,6 @@ CREATE INDEX containers_search_index ON public.containers USING btree (uuid, own
 CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
 
 
---
--- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: groups_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2779,13 +2758,6 @@ CREATE UNIQUE INDEX index_workflows_on_uuid ON public.workflows USING btree (uui
 CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, job_uuid, created_by_job_task_uuid);
 
 
---
--- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: jobs_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2877,13 +2849,6 @@ CREATE INDEX permission_target ON public.materialized_permissions USING btree (t
 CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid);
 
 
---
--- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: pipeline_instances_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2905,13 +2870,6 @@ CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instance
 CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_templates USING btree (owner_uuid, name);
 
 
---
--- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
 --
 -- Name: pipeline_templates_search_index; Type: INDEX; Schema: public; Owner: -
 --
@@ -2968,13 +2926,6 @@ CREATE INDEX users_search_index ON public.users USING btree (uuid, owner_uuid, m
 CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, hostname);
 
 
---
--- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000)));
-
-
 --
 -- Name: workflows_search_idx; Type: INDEX; Schema: public; Owner: -
 --
@@ -3194,6 +3145,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20201202174753'),
 ('20210108033940'),
 ('20210126183521'),
-('20210621204455');
+('20210621204455'),
+('20210816191509');
 
 
index 5688ca6140f17fcef94ed112481cddb06e75c668..f8898d63c90de2169fc8d18b53d40f68171ae945 100644 (file)
@@ -31,7 +31,10 @@ module RecordFilters
     model_table_name = model_class.table_name
     filters.each do |filter|
       attrs_in, operator, operand = filter
-      if attrs_in == 'any' && operator != '@@'
+      if operator == '@@'
+        raise ArgumentError.new("Full text search operator is no longer supported")
+      end
+      if attrs_in == 'any'
         attrs = model_class.searchable_columns(operator)
       elsif attrs_in.is_a? Array
         attrs = attrs_in
@@ -54,22 +57,6 @@ module RecordFilters
         attrs = []
       end
 
-      if operator == '@@'
-        # Full-text search
-        if attrs_in != 'any'
-          raise ArgumentError.new("Full text search on individual columns is not supported")
-        end
-        if operand.is_a? Array
-          raise ArgumentError.new("Full text search not supported for array operands")
-        end
-
-        # Skip the generic per-column operator loop below
-        attrs = []
-        # Use to_tsquery since plainto_tsquery does not support prefix
-        # search. And, split operand and join the words with ' & '
-        cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)"
-        param_out << operand.split.join(' & ')
-      end
       attrs.each do |attr|
         subproperty = attr.split(".", 2)
 
index 7bcf315b0443a4244da40817c54a26015bcc66b6..d83c2b6030e1e7cc12f6ad4902bda1d13624e756 100644 (file)
@@ -11,30 +11,54 @@ require 'current_api_client'
 namespace :db do
   desc "Apply expiration policy on long lived tokens"
   task fix_long_lived_tokens: :environment do
-    if Rails.configuration.Login.TokenLifetime == 0
-      puts("No expiration policy set on Login.TokenLifetime.")
-    else
-      exp_date = Time.now + Rails.configuration.Login.TokenLifetime
-      puts("Setting token expiration to: #{exp_date}")
-      token_count = 0
-      ll_tokens.each do |auth|
-        if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
-          CurrentApiClientHelper.act_as_system_user do
-            auth.update_attributes!(expires_at: exp_date)
-          end
-          token_count += 1
+    lifetime = Rails.configuration.API.MaxTokenLifetime
+    if lifetime.nil? or lifetime == 0
+      lifetime = Rails.configuration.Login.TokenLifetime
+    end
+    if lifetime.nil? or lifetime == 0
+      puts("No expiration policy set (API.MaxTokenLifetime nor Login.TokenLifetime is set), nothing to do.")
+      # abort the rake task
+      next
+    end
+    exp_date = Time.now + lifetime
+    puts("Setting token expiration to: #{exp_date}")
+    token_count = 0
+    ll_tokens(lifetime).each do |auth|
+      if auth.user.nil?
+        printf("*** WARNING, found ApiClientAuthorization with invalid user: auth id: %d, user id: %d\n", auth.id, auth.user_id)
+        # skip this token
+        next
+      end
+      if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+        CurrentApiClientHelper.act_as_system_user do
+          auth.update_attributes!(expires_at: exp_date)
         end
+        token_count += 1
       end
-      puts("#{token_count} tokens updated.")
     end
+    puts("#{token_count} tokens updated.")
   end
 
   desc "Show users with long lived tokens"
   task check_long_lived_tokens: :environment do
+    lifetime = Rails.configuration.API.MaxTokenLifetime
+    if lifetime.nil? or lifetime == 0
+      lifetime = Rails.configuration.Login.TokenLifetime
+    end
+    if lifetime.nil? or lifetime == 0
+      puts("No expiration policy set (API.MaxTokenLifetime nor Login.TokenLifetime is set), nothing to do.")
+      # abort the rake task
+      next
+    end
     user_ids = Set.new()
     token_count = 0
-    ll_tokens.each do |auth|
-      if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+    ll_tokens(lifetime).each do |auth|
+      if auth.user.nil?
+        printf("*** WARNING, found ApiClientAuthorization with invalid user: auth id: %d, user id: %d\n", auth.id, auth.user_id)
+        # skip this token
+        next
+      end
+      if not auth.user.nil? and (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
         user_ids.add(auth.user_id)
         token_count += 1
       end
@@ -51,11 +75,9 @@ namespace :db do
     end
   end
 
-  def ll_tokens
+  def ll_tokens(lifetime)
     query = ApiClientAuthorization.where(expires_at: nil)
-    if Rails.configuration.Login.TokenLifetime > 0
-      query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + Rails.configuration.Login.TokenLifetime))
-    end
+    query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + lifetime))
     query
   end
 end
index 9b067aa263d2baede05c8a325560117a7d9df109..ab76417902214162506707d3e642f93539ffe7ed 100644 (file)
@@ -521,7 +521,7 @@ running_job_in_publicly_accessible_project:
   uuid: zzzzz-8i9sb-n7omg50bvt0m1nf
   owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: active/foo
+  repository: active/bar
   script: running_job_script
   script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
   state: Running
index 0865503281fe247f0fd027d4054d846a9370e9cf..9621b3effc1c74f0b832c021b3c9d2b99ef11586 100644 (file)
@@ -111,12 +111,9 @@ has_job:
 components_is_jobspec:
   # Helps test that clients cope with funny-shaped components.
   # For an example, see #3321.
-  uuid: zzzzz-d1hrv-jobspeccomponts
-  created_at: <%= 30.minute.ago.to_s(:db) %>
+  uuid: zzzzz-d1hrv-1yfj61234abcdk4
+  created_at: <%= 2.minute.ago.to_s(:db) %>
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: 2014-04-14 12:35:04 -0400
-  updated_at: 2014-04-14 12:35:04 -0400
-  modified_at: 2014-04-14 12:35:04 -0400
   modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   state: RunningOnServer
index 26270b1c3c9c9b4da0ec4c03f6a8d6fd861fbe70..bcb18078674ffd27bb124772b4478ebecfff9a76 100644 (file)
@@ -29,34 +29,14 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
                  json_response['errors'].join(' '))
   end
 
-  test 'error message for full text search on a specific column' do
+  test 'error message for unsupported full text search' do
     @controller = Arvados::V1::CollectionsController.new
     authorize_with :active
     get :index, params: {
       filters: [['uuid', '@@', 'abcdef']],
     }
     assert_response 422
-    assert_match(/not supported/, json_response['errors'].join(' '))
-  end
-
-  test 'difficult characters in full text search' do
-    @controller = Arvados::V1::CollectionsController.new
-    authorize_with :active
-    get :index, params: {
-      filters: [['any', '@@', 'a|b"c']],
-    }
-    assert_response :success
-    # (Doesn't matter so much which results are returned.)
-  end
-
-  test 'array operand in full text search' do
-    @controller = Arvados::V1::CollectionsController.new
-    authorize_with :active
-    get :index, params: {
-      filters: [['any', '@@', ['abc', 'def']]],
-    }
-    assert_response 422
-    assert_match(/not supported/, json_response['errors'].join(' '))
+    assert_match(/no longer supported/, json_response['errors'].join(' '))
   end
 
   test 'api responses provide timestamps with nanoseconds' do
@@ -100,58 +80,6 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
     end
   end
 
-  test "full text search with count='none'" do
-    @controller = Arvados::V1::GroupsController.new
-    authorize_with :admin
-
-    get :contents, params: {
-      format: :json,
-      count: 'none',
-      limit: 1000,
-      filters: [['any', '@@', Rails.configuration.ClusterID]],
-    }
-
-    assert_response :success
-
-    all_objects = Hash.new(0)
-    json_response['items'].map{|o| o['kind']}.each{|t| all_objects[t] += 1}
-
-    assert_equal true, all_objects['arvados#group']>0
-    assert_equal true, all_objects['arvados#job']>0
-    assert_equal true, all_objects['arvados#pipelineInstance']>0
-    assert_equal true, all_objects['arvados#pipelineTemplate']>0
-
-    # Perform test again mimicking a second page request with:
-    # last_object_class = PipelineInstance
-    #   and hence groups and jobs should not be included in the response
-    # offset = 5, which means first 5 pipeline instances were already received in page 1
-    #   and hence the remaining pipeline instances and all other object types should be included in the response
-
-    @test_counter = 0  # Reset executed action counter
-
-    @controller = Arvados::V1::GroupsController.new
-
-    get :contents, params: {
-      format: :json,
-      count: 'none',
-      limit: 1000,
-      offset: '5',
-      last_object_class: 'PipelineInstance',
-      filters: [['any', '@@', Rails.configuration.ClusterID]],
-    }
-
-    assert_response :success
-
-    second_page = Hash.new(0)
-    json_response['items'].map{|o| o['kind']}.each{|t| second_page[t] += 1}
-
-    assert_equal false, second_page.include?('arvados#group')
-    assert_equal false, second_page.include?('arvados#job')
-    assert_equal true, second_page['arvados#pipelineInstance']>0
-    assert_equal all_objects['arvados#pipelineInstance'], second_page['arvados#pipelineInstance']+5
-    assert_equal true, second_page['arvados#pipelineTemplate']>0
-  end
-
   [['prop1', '=', 'value1', [:collection_with_prop1_value1], [:collection_with_prop1_value2, :collection_with_prop2_1]],
    ['prop1', '!=', 'value1', [:collection_with_prop1_value2, :collection_with_prop2_1], [:collection_with_prop1_value1]],
    ['prop1', 'exists', true, [:collection_with_prop1_value1, :collection_with_prop1_value2, :collection_with_prop1_value3, :collection_with_prop1_other1], [:collection_with_prop2_1]],
index 73cbad64303391e82ef593d7a9cffc080ae6084f..070e964e538c6d0f23992b5d1426be7f88f7146d 100644 (file)
@@ -373,75 +373,6 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "search collection using full text search" do
-    # create collection to be searched for
-    signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active))
-    post "/arvados/v1/collections",
-      params: {
-        format: :json,
-        collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json,
-      },
-      headers: auth(:active)
-    assert_response :success
-    assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
-
-    # search using the filename
-    search_using_full_text_search 'subdir2', 0
-    search_using_full_text_search 'subdir2:*', 1
-    search_using_full_text_search 'subdir2/subdir3/subdir4', 1
-    search_using_full_text_search 'file4:*', 1
-    search_using_full_text_search 'file4_in_subdir4.txt', 1
-    search_using_full_text_search 'subdir2 file4:*', 0      # first word is incomplete
-    search_using_full_text_search 'subdir2/subdir3/subdir4 file4:*', 1
-    search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1
-    search_using_full_text_search 'ile4', 0                 # not a prefix match
-  end
-
-  def search_using_full_text_search search_filter, expected_items
-    get '/arvados/v1/collections',
-      params: {:filters => [['any', '@@', search_filter]].to_json},
-      headers: auth(:active)
-    assert_response :success
-    response_items = json_response['items']
-    assert_not_nil response_items
-    if expected_items == 0
-      assert_empty response_items
-    else
-      refute_empty response_items
-      first_item = response_items.first
-      assert_not_nil first_item
-    end
-  end
-
-  # search for the filename in the file_names column and expect error
-  test "full text search not supported for individual columns" do
-    get '/arvados/v1/collections',
-      params: {:filters => [['name', '@@', 'General']].to_json},
-      headers: auth(:active)
-    assert_response 422
-  end
-
-  [
-    'quick fox',
-    'quick_brown fox',
-    'brown_ fox',
-    'fox dogs',
-  ].each do |search_filter|
-    test "full text search ignores special characters and finds with filter #{search_filter}" do
-      # description: The quick_brown_fox jumps over the lazy_dog
-      # full text search treats '_' as space apparently
-      get '/arvados/v1/collections',
-        params: {:filters => [['any', '@@', search_filter]].to_json},
-        headers: auth(:active)
-      assert_response 200
-      response_items = json_response['items']
-      assert_not_nil response_items
-      first_item = response_items.first
-      refute_empty first_item
-      assert_equal first_item['description'], 'The quick_brown_fox jumps over the lazy_dog'
-    end
-  end
-
   test "create and get collection with properties" do
     # create collection to be searched for
     signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
index aa67166f7e613a7b71f1ce8b798cf3b23b060e4a..e76f2b54068ad729fe94f87a3d2150846674db0b 100644 (file)
@@ -64,46 +64,6 @@ class GroupsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  [
-    ['Collection_', true],            # collections and pipelines templates
-    ['hash', true],                   # pipeline templates
-    ['fa7aeb5140e2848d39b', false],   # script_parameter of pipeline instances
-    ['fa7aeb5140e2848d39b:*', true],  # script_parameter of pipeline instances
-    ['project pipeline', true],       # finds "Completed pipeline in A Project"
-    ['project pipeli:*', true],       # finds "Completed pipeline in A Project"
-    ['proje pipeli:*', false],        # first word is incomplete, so no prefix match
-    ['no-such-thing', false],         # script_parameter of pipeline instances
-  ].each do |search_filter, expect_results|
-    test "full text search of group-owned objects for #{search_filter}" do
-      get "/arvados/v1/groups/contents",
-        params: {
-          id: groups(:aproject).uuid,
-          limit: 5,
-          :filters => [['any', '@@', search_filter]].to_json
-        },
-        headers: auth(:active)
-      assert_response :success
-      if expect_results
-        refute_empty json_response['items']
-        json_response['items'].each do |item|
-          assert item['uuid']
-          assert_equal groups(:aproject).uuid, item['owner_uuid']
-        end
-      else
-        assert_empty json_response['items']
-      end
-    end
-  end
-
-  test "full text search is not supported for individual columns" do
-    get "/arvados/v1/groups/contents",
-      params: {
-        :filters => [['name', '@@', 'Private']].to_json
-      },
-      headers: auth(:active)
-    assert_response 422
-  end
-
   test "group contents with include trash collections" do
     get "/arvados/v1/groups/contents",
       params: {
index 64f78071350a6736994986eff3267c541e72b4f6..1e2e08059ef92c75827bcea9baa5d95edc2945c4 100644 (file)
@@ -155,51 +155,6 @@ class ArvadosModelTest < ActiveSupport::TestCase
     end
   end
 
-  test "full text search index exists on models" do
-    indexes = {}
-    conn = ActiveRecord::Base.connection
-    conn.exec_query("SELECT i.relname as indname,
-      i.relowner as indowner,
-      idx.indrelid::regclass::text as table,
-      am.amname as indam,
-      idx.indkey,
-      ARRAY(
-            SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
-                   FROM generate_subscripts(idx.indkey, 1) as k
-                   ORDER BY k
-                   ) as keys,
-      idx.indexprs IS NOT NULL as indexprs,
-      idx.indpred IS NOT NULL as indpred
-      FROM   pg_index as idx
-      JOIN   pg_class as i
-      ON     i.oid = idx.indexrelid
-      JOIN   pg_am as am
-      ON     i.relam = am.oid
-      JOIN   pg_namespace as ns
-      ON     ns.oid = i.relnamespace
-      AND    ns.nspname = ANY(current_schemas(false))").each do |idx|
-      if idx['keys'].match(/to_tsvector/)
-        indexes[idx['table']] ||= []
-        indexes[idx['table']] << idx
-      end
-    end
-    fts_tables =  ["collections", "container_requests", "groups", "jobs",
-                   "pipeline_instances", "pipeline_templates", "workflows"]
-    fts_tables.each do |table|
-      table_class = table.classify.constantize
-      if table_class.respond_to?('full_text_searchable_columns')
-        expect = table_class.full_text_searchable_columns
-        ok = false
-        indexes[table].andand.each do |idx|
-          if expect == idx['keys'].scan(/COALESCE\(([A-Za-z_]+)/).flatten
-            ok = true
-          end
-        end
-        assert ok, "#{table} has no full-text index\nexpect: #{expect.inspect}\nfound: #{indexes[table].inspect}"
-      end
-    end
-  end
-
   [
     %w[collections collections_trgm_text_search_idx],
     %w[container_requests container_requests_trgm_text_search_idx],
index c288786c1323246c589af91aa53fc3d0aa37c557..e58c273a6d85adebff1dc1d3d113fa01f6b3a893 100644 (file)
@@ -10,6 +10,7 @@ class UserNotifierTest < ActionMailer::TestCase
   test "account is setup" do
     user = users :active
 
+    Rails.configuration.Users.UserNotifierEmailBcc = ConfigLoader.to_OrderedOptions({"bcc-notify@example.com"=>{},"bcc-notify2@example.com"=>{}})
     Rails.configuration.Users.UserSetupMailText = %{
 <% if not @user.full_name.empty? -%>
 <%= @user.full_name %>,
@@ -33,6 +34,7 @@ The Arvados team.
 
     # Test the body of the sent email contains what we expect it to
     assert_equal Rails.configuration.Users.UserNotifierEmailFrom, email.from.first
+    assert_equal Rails.configuration.Users.UserNotifierEmailBcc.stringify_keys.keys, email.bcc
     assert_equal user.email, email.to.first
     assert_equal 'Welcome to Arvados - account enabled', email.subject
     assert (email.body.to_s.include? 'Your Arvados shell account has been set up'),
index 78cbd0d8cfd06f1c638549151c56e74a32025237..2b963d9a68659c342de818af52789f0d96031ef3 100644 (file)
@@ -298,20 +298,52 @@ class CollectionDirectoryBase(Directory):
     def on_event(self, event, collection, name, item):
         if collection == self.collection:
             name = self.sanitize_filename(name)
-            _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
-            with llfuse.lock:
-                if event == arvados.collection.ADD:
-                    self.new_entry(name, item, self.mtime())
-                elif event == arvados.collection.DEL:
-                    ent = self._entries[name]
-                    del self._entries[name]
-                    self.inodes.invalidate_entry(self, name)
-                    self.inodes.del_entry(ent)
-                elif event == arvados.collection.MOD:
-                    if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
-                        self.inodes.invalidate_inode(item.fuse_entry)
-                    elif name in self._entries:
-                        self.inodes.invalidate_inode(self._entries[name])
+
+            #
+            # It's possible for another thread to have llfuse.lock and
+            # be waiting on collection.lock.  Meanwhile, we released
+            # llfuse.lock earlier in the stack, but are still holding
+            # on to the collection lock, and now we need to re-acquire
+            # llfuse.lock.  If we don't release the collection lock,
+            # we'll deadlock where we're holding the collection lock
+            # waiting for llfuse.lock and the other thread is holding
+            # llfuse.lock and waiting for the collection lock.
+            #
+            # The correct locking order here is to take llfuse.lock
+            # first, then the collection lock.
+            #
+            # Since collection.lock is an RLock, it might be locked
+            # multiple times, so we need to release it multiple times,
+            # keep a count, then re-lock it the correct number of
+            # times.
+            #
+            lockcount = 0
+            try:
+                while True:
+                    self.collection.lock.release()
+                    lockcount += 1
+            except RuntimeError:
+                pass
+
+            try:
+                with llfuse.lock:
+                    with self.collection.lock:
+                        if event == arvados.collection.ADD:
+                            self.new_entry(name, item, self.mtime())
+                        elif event == arvados.collection.DEL:
+                            ent = self._entries[name]
+                            del self._entries[name]
+                            self.inodes.invalidate_entry(self, name)
+                            self.inodes.del_entry(ent)
+                        elif event == arvados.collection.MOD:
+                            if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+                                self.inodes.invalidate_inode(item.fuse_entry)
+                            elif name in self._entries:
+                                self.inodes.invalidate_inode(self._entries[name])
+            finally:
+                while lockcount > 0:
+                    self.collection.lock.acquire()
+                    lockcount -= 1
 
     def populate(self, mtime):
         self._mtime = mtime
@@ -587,10 +619,26 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     def on_event(self, *args, **kwargs):
         super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
         if self.collection_record_file:
-            with llfuse.lock:
-                self.collection_record_file.invalidate()
-            self.inodes.invalidate_inode(self.collection_record_file)
-            _logger.debug("%s invalidated collection record", self)
+
+            # See discussion in CollectionDirectoryBase.on_event
+            lockcount = 0
+            try:
+                while True:
+                    self.collection.lock.release()
+                    lockcount += 1
+            except RuntimeError:
+                pass
+
+            try:
+                with llfuse.lock:
+                    with self.collection.lock:
+                        self.collection_record_file.invalidate()
+                        self.inodes.invalidate_inode(self.collection_record_file)
+                        _logger.debug("%s invalidated collection record", self)
+            finally:
+                while lockcount > 0:
+                    self.collection.lock.acquire()
+                    lockcount -= 1
 
     def collection_record(self):
         with llfuse.lock_released:
index 897447dd11c7a95a5b113d867fb0de28cbed6844..16dcd2aaf6ee5d57e9bb60176a643a9116df8f9e 100644 (file)
@@ -23,6 +23,7 @@ import (
        "os"
        "sort"
        "strings"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/lib/config"
@@ -367,6 +368,94 @@ func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) {
        }
 }
 
+func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) {
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-111111111111111": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       ReadOnly:       true,
+                       StorageClasses: map[string]bool{"class1": true}},
+       }
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+       resp := IssueRequest(s.handler,
+               &RequestTester{
+                       method:         "PUT",
+                       uri:            "/" + TestHash,
+                       requestBody:    TestBlock,
+                       storageClasses: "class1",
+               })
+       c.Check(resp.Code, check.Equals, FullError.HTTPCode)
+       c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0)
+}
+
+func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) {
+       s.cluster.Volumes = map[string]arvados.Volume{
+               "zzzzz-nyw5e-111111111111111": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class1": true}},
+               "zzzzz-nyw5e-121212121212121": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class1": true, "class2": true}},
+               "zzzzz-nyw5e-222222222222222": {
+                       Driver:         "mock",
+                       Replication:    1,
+                       StorageClasses: map[string]bool{"class2": true}},
+       }
+
+       for _, trial := range []struct {
+               setCounter uint32 // value to stuff vm.counter, to control offset
+               classes    string // desired classes
+               put111     int    // expected number of "put" ops on 11111... after 2x put reqs
+               put121     int    // expected number of "put" ops on 12121...
+               put222     int    // expected number of "put" ops on 22222...
+               cmp111     int    // expected number of "compare" ops on 11111... after 2x put reqs
+               cmp121     int    // expected number of "compare" ops on 12121...
+               cmp222     int    // expected number of "compare" ops on 22222...
+       }{
+               {0, "class1",
+                       1, 0, 0,
+                       2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121
+               {0, "class2",
+                       0, 1, 0,
+                       0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121
+               {0, "class1,class2",
+                       1, 1, 0,
+                       2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+               {1, "class1,class2",
+                       0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121
+                       2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+               {0, "class1,class2,class404",
+                       1, 1, 0,
+                       2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121
+       } {
+               c.Logf("%+v", trial)
+               s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+                       "class1": {},
+                       "class2": {},
+                       "class3": {},
+               }
+               c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+               atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter)
+               for i := 0; i < 2; i++ {
+                       IssueRequest(s.handler,
+                               &RequestTester{
+                                       method:         "PUT",
+                                       uri:            "/" + TestHash,
+                                       requestBody:    TestBlock,
+                                       storageClasses: trial.classes,
+                               })
+               }
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121)
+               c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222)
+       }
+}
+
 // Test TOUCH requests.
 func (s *HandlerSuite) TestTouchHandler(c *check.C) {
        c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
index 2b469a13eb993e0827bac8ae1ebe4db46bc8c4df..910033ebb1d8408c90a4bde441d7edc8d99b109a 100644 (file)
@@ -18,6 +18,7 @@ import (
        "strconv"
        "strings"
        "sync"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
@@ -741,6 +742,7 @@ func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []b
 }
 
 type putProgress struct {
+       classNeeded      map[string]bool
        classTodo        map[string]bool
        mountUsed        map[*VolumeMount]bool
        totalReplication int
@@ -769,7 +771,7 @@ func (pr putProgress) ClassReplication() string {
 
 func (pr *putProgress) Add(mnt *VolumeMount) {
        if pr.mountUsed[mnt] {
-               logrus.Warnf("BUG? superfluous extra write to mount %s", mnt)
+               logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID)
                return
        }
        pr.mountUsed[mnt] = true
@@ -780,6 +782,21 @@ func (pr *putProgress) Add(mnt *VolumeMount) {
        }
 }
 
+func (pr *putProgress) Sub(mnt *VolumeMount) {
+       if !pr.mountUsed[mnt] {
+               logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID)
+               return
+       }
+       pr.mountUsed[mnt] = false
+       pr.totalReplication -= mnt.Replication
+       for class := range mnt.StorageClasses {
+               pr.classDone[class] -= mnt.Replication
+               if pr.classNeeded[class] {
+                       pr.classTodo[class] = true
+               }
+       }
+}
+
 func (pr *putProgress) Done() bool {
        return len(pr.classTodo) == 0 && pr.totalReplication > 0
 }
@@ -800,47 +817,65 @@ func (pr *putProgress) Want(mnt *VolumeMount) bool {
        return false
 }
 
-func newPutResult(classes []string) putProgress {
+func (pr *putProgress) Copy() *putProgress {
+       cp := putProgress{
+               classNeeded:      pr.classNeeded,
+               classTodo:        make(map[string]bool, len(pr.classTodo)),
+               classDone:        make(map[string]int, len(pr.classDone)),
+               mountUsed:        make(map[*VolumeMount]bool, len(pr.mountUsed)),
+               totalReplication: pr.totalReplication,
+       }
+       for k, v := range pr.classTodo {
+               cp.classTodo[k] = v
+       }
+       for k, v := range pr.classDone {
+               cp.classDone[k] = v
+       }
+       for k, v := range pr.mountUsed {
+               cp.mountUsed[k] = v
+       }
+       return &cp
+}
+
+func newPutProgress(classes []string) putProgress {
        pr := putProgress{
-               classTodo: make(map[string]bool, len(classes)),
-               classDone: map[string]int{},
-               mountUsed: map[*VolumeMount]bool{},
+               classNeeded: make(map[string]bool, len(classes)),
+               classTodo:   make(map[string]bool, len(classes)),
+               classDone:   map[string]int{},
+               mountUsed:   map[*VolumeMount]bool{},
        }
        for _, c := range classes {
                if c != "" {
+                       pr.classNeeded[c] = true
                        pr.classTodo[c] = true
                }
        }
        return pr
 }
 
-// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-// PutBlock(ctx, block, hash)
-//   Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-//   The MD5 checksum of the block must be identical to the content id HASH.
-//   If not, an error is returned.
+// PutBlock stores the given block on one or more volumes.
 //
-//   PutBlock stores the BLOCK on the first Keep volume with free space.
-//   A failure code is returned to the user only if all volumes fail.
+// The MD5 checksum of the block must match the given hash.
 //
-//   On success, PutBlock returns nil.
-//   On failure, it returns a KeepError with one of the following codes:
+// The block is written to each writable volume (ordered by priority
+// and then UUID, see volume.go) until at least one replica has been
+// stored in each of the requested storage classes.
 //
-//   500 Collision
-//          A different block with the same hash already exists on this
-//          Keep server.
-//   422 MD5Fail
-//          The MD5 hash of the BLOCK does not match the argument HASH.
-//   503 Full
-//          There was not enough space left in any Keep volume to store
-//          the object.
-//   500 Fail
-//          The object could not be stored for some other reason (e.g.
-//          all writes failed). The text of the error message should
-//          provide as much detail as possible.
+// The returned error, if any, is a KeepError with one of the
+// following codes:
 //
+// 500 Collision
+//        A different block with the same hash already exists on this
+//        Keep server.
+// 422 MD5Fail
+//        The MD5 hash of the BLOCK does not match the argument HASH.
+// 503 Full
+//        There was not enough space left in any Keep volume to store
+//        the object.
+// 500 Fail
+//        The object could not be stored for some other reason (e.g.
+//        all writes failed). The text of the error message should
+//        provide as much detail as possible.
 func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) {
        log := ctxlog.FromContext(ctx)
 
@@ -851,72 +886,88 @@ func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash s
                return putProgress{}, RequestHashError
        }
 
-       result := newPutResult(wantStorageClasses)
+       result := newPutProgress(wantStorageClasses)
 
        // If we already have this data, it's intact on disk, and we
        // can update its timestamp, return success. If we have
        // different data with the same hash, return failure.
-       if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil {
+       if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() {
                return result, err
        }
        if ctx.Err() != nil {
                return result, ErrClientDisconnect
        }
 
-       // Choose a Keep volume to write to.
-       // If this volume fails, try all of the volumes in order.
-       if mnt := volmgr.NextWritable(); mnt == nil || !result.Want(mnt) {
-               // fall through to "try all volumes" below
-       } else if err := mnt.Put(ctx, hash, block); err != nil {
-               log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-       } else {
-               result.Add(mnt)
-               if result.Done() {
-                       return result, nil
-               }
-       }
-       if ctx.Err() != nil {
-               return putProgress{}, ErrClientDisconnect
-       }
-
-       writables := volmgr.AllWritable()
+       writables := volmgr.NextWritable()
        if len(writables) == 0 {
                log.Error("no writable volumes")
-               return putProgress{}, FullError
+               return result, FullError
        }
 
-       allFull := true
+       var wg sync.WaitGroup
+       var mtx sync.Mutex
+       cond := sync.Cond{L: &mtx}
+       // pending predicts what result will be if all pending writes
+       // succeed.
+       pending := result.Copy()
+       var allFull atomic.Value
+       allFull.Store(true)
+
+       // We hold the lock for the duration of the "each volume" loop
+       // below, except when it is released during cond.Wait().
+       mtx.Lock()
+
        for _, mnt := range writables {
+               // Wait until our decision to use this mount does not
+               // depend on the outcome of pending writes.
+               for result.Want(mnt) && !pending.Want(mnt) {
+                       cond.Wait()
+               }
                if !result.Want(mnt) {
                        continue
                }
-               err := mnt.Put(ctx, hash, block)
-               if ctx.Err() != nil {
-                       return result, ErrClientDisconnect
-               }
-               switch err {
-               case nil:
-                       result.Add(mnt)
-                       if result.Done() {
-                               return result, nil
+               mnt := mnt
+               pending.Add(mnt)
+               wg.Add(1)
+               go func() {
+                       log.Debugf("PutBlock: start write to %s", mnt.UUID)
+                       defer wg.Done()
+                       err := mnt.Put(ctx, hash, block)
+
+                       mtx.Lock()
+                       if err != nil {
+                               log.Debugf("PutBlock: write to %s failed", mnt.UUID)
+                               pending.Sub(mnt)
+                       } else {
+                               log.Debugf("PutBlock: write to %s succeeded", mnt.UUID)
+                               result.Add(mnt)
                        }
-                       continue
-               case FullError:
-                       continue
-               default:
-                       // The volume is not full but the
-                       // write did not succeed.  Report the
-                       // error and continue trying.
-                       allFull = false
-                       log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
-               }
+                       cond.Broadcast()
+                       mtx.Unlock()
+
+                       if err != nil && err != FullError && ctx.Err() == nil {
+                               // The volume is not full but the
+                               // write did not succeed.  Report the
+                               // error and continue trying.
+                               allFull.Store(false)
+                               log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
+                       }
+               }()
+       }
+       mtx.Unlock()
+       wg.Wait()
+       if ctx.Err() != nil {
+               return result, ErrClientDisconnect
+       }
+       if result.Done() {
+               return result, nil
        }
 
        if result.totalReplication > 0 {
                // Some, but not all, of the storage classes were
                // satisfied. This qualifies as success.
                return result, nil
-       } else if allFull {
+       } else if allFull.Load().(bool) {
                log.Error("all volumes with qualifying storage classes are full")
                return putProgress{}, FullError
        } else {
index 9bfc6ca3e5191d2953ceac75f915a07cab19c69f..3f7c9cb79b4b24b71c3c441e49235fd657d77e69 100644 (file)
@@ -344,11 +344,11 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my
                        vm.writables = append(vm.writables, mnt)
                }
        }
-       // pri(i): return highest priority of any storage class
-       // offered by vm.readables[i]
-       pri := func(i int) int {
+       // pri(mnt): return highest priority of any storage class
+       // offered by mnt
+       pri := func(mnt *VolumeMount) int {
                any, best := false, 0
-               for class := range vm.readables[i].KeepMount.StorageClasses {
+               for class := range mnt.KeepMount.StorageClasses {
                        if p := cluster.StorageClasses[class].Priority; !any || best < p {
                                best = p
                                any = true
@@ -356,14 +356,20 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my
                }
                return best
        }
-       // sort vm.readables, first by highest priority of any offered
+       // less(a,b): sort first by highest priority of any offered
        // storage class (highest->lowest), then by volume UUID
-       sort.Slice(vm.readables, func(i, j int) bool {
-               if pi, pj := pri(i), pri(j); pi != pj {
-                       return pi > pj
+       less := func(a, b *VolumeMount) bool {
+               if pa, pb := pri(a), pri(b); pa != pb {
+                       return pa > pb
                } else {
-                       return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID
+                       return a.KeepMount.UUID < b.KeepMount.UUID
                }
+       }
+       sort.Slice(vm.readables, func(i, j int) bool {
+               return less(vm.readables[i], vm.readables[j])
+       })
+       sort.Slice(vm.writables, func(i, j int) bool {
+               return less(vm.writables[i], vm.writables[j])
        })
        return vm, nil
 }
@@ -384,18 +390,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount {
        return vm.readables
 }
 
-// AllWritable returns an array of all writable volumes
+// AllWritable returns writable volumes, sorted by priority/uuid. Used
+// by CompareAndTouch to ensure higher-priority volumes are checked
+// first.
 func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
        return vm.writables
 }
 
-// NextWritable returns the next writable
-func (vm *RRVolumeManager) NextWritable() *VolumeMount {
+// NextWritable returns writable volumes, rotated by vm.counter so
+// each volume gets a turn to be first. Used by PutBlock to distribute
+// new data across available volumes.
+func (vm *RRVolumeManager) NextWritable() []*VolumeMount {
        if len(vm.writables) == 0 {
                return nil
        }
-       i := atomic.AddUint32(&vm.counter, 1)
-       return vm.writables[i%uint32(len(vm.writables))]
+       offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables)
+       return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...)
 }
 
 // VolumeStats returns an ioStats for the given volume.
index 79f0d3f4f6c2f0a21ddc5ab3d1e711831c1be896..c112972c4303103a6fee1fc920fa309022b340ee 100644 (file)
@@ -73,7 +73,7 @@ ENV DEBIAN_FRONTEND noninteractive
 #  gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less
 RUN apt-get update && \
     apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
-    gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less && \
+    gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less vim-tiny && \
     apt-get clean
 
 ENV GOPATH /var/lib/gopath
index fb3eaaeee875e147f761cef7dbb8f317be7aaa31..a112cb93fe07cadbcfb814606497df0b0e9328f8 100755 (executable)
@@ -59,5 +59,6 @@ fi
 export VERSION=$(./version-at-commit.sh)
 export BROWSER=none
 export CI=true
+export HTTPS=false
 node --version
 exec node node_modules/react-scripts/scripts/start.js
index 3019a9fb1cb50ac5595c0b76228489f2b1e9e4d4..a3463bfc5c5f796b414adb68747b16ff975427eb 100644 (file)
@@ -35,7 +35,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
                                cp -vr /vagrant/tests /home/vagrant/tests;
                                sed 's#cluster_fixme_or_this_wont_work#harpo#g;
                                     s#domain_fixme_or_this_wont_work#local#g;
-                                    s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g;
+                                    s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g;
                                     s#CONTROLLER_EXT_SSL_PORT=443#CONTROLLER_EXT_SSL_PORT=8443#g' \
                                     /vagrant/local.params.example.single_host_multiple_hostnames > /tmp/local.params.single_host_multiple_hostnames"
      arv.vm.provision "shell",
@@ -78,7 +78,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
                                cp -vr /vagrant/tests /home/vagrant/tests;
                                sed 's#HOSTNAME_EXT=\"\"#HOSTNAME_EXT=\"zeppo.local\"#g;
                                     s#cluster_fixme_or_this_wont_work#zeppo#g;
-                                    s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g;
+                                    s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g;
                                     s#domain_fixme_or_this_wont_work#local#g;' \
                                     /vagrant/local.params.example.single_host_single_hostname > /tmp/local.params.single_host_single_hostname"
      arv.vm.provision "shell",
index 23e007650480ab28414b5bbbd4251cd655e75f3b..ccf6bac7895e1e873a05c04200f77eeecdea702d 100644 (file)
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+# vim: ft=yaml
 ---
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
@@ -26,6 +28,7 @@ arvados:
   ## manage OS packages with some other tool and you don't want us messing up
   ## with your setup.
   ruby:
+
     ## We set these to `true` here for testing purposes.
     ## They both default to `false`.
     manage_ruby: true
@@ -67,8 +70,15 @@ arvados:
       host: 127.0.0.1
       password: "__DATABASE_PASSWORD__"
       user: __CLUSTER___arvados
-      encoding: en_US.utf8
-      client_encoding: UTF8
+      extra_conn_params:
+        client_encoding: UTF8
+      # Centos7 does not enable SSL by default, so we disable
+      # it here just for testing of the formula purposes only.
+      # You should not do this in production, and should
+      # configure Postgres certificates correctly
+      {%- if grains.os_family in ('RedHat',) %}
+        sslmode: disable
+      {%- endif %}
 
     tls:
       # certificate: ''
@@ -76,6 +86,13 @@ arvados:
       # required to test with arvados-snakeoil certs
       insecure: true
 
+    resources:
+      virtual_machines:
+        shell:
+          name: webshell
+          backend: 127.0.1.1
+          port: 4200
+
     ### TOKENS
     tokens:
       system_root: __SYSTEM_ROOT_TOKEN__
index b2f12c77399bdd9df8c48f7d3ac9f9004670f1aa..54087f6d6d0fe43ae9c1a12e71ac2604935a2635 100644 (file)
@@ -3,17 +3,23 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- if grains.os_family in ('RedHat',) %}
+  {%- set group = 'nginx' %}
+{%- else %}
+  {%- set group = 'www-data' %}
+{%- endif %}
+
 ### ARVADOS
 arvados:
   config:
-    group: www-data
+    group: {{ group }}
 
 ### NGINX
 nginx:
   ### SITES
   servers:
     managed:
-      arvados_api:
+      arvados_api.conf:
         enabled: true
         overwrite: true
         config:
index 3adf0580a43647e2919b37c796b39b79d89001e4..195e9af82e5f3b84187c6467eb229ae4284e5d0c 100644 (file)
@@ -20,7 +20,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_controller_default:
+      arvados_controller_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -33,9 +33,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_controller_ssl:
+      arvados_controller_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: __CLUSTER__.__DOMAIN__
@@ -52,7 +54,8 @@ nginx:
               - proxy_set_header: 'X-Real-IP $remote_addr'
               - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
               - proxy_set_header: 'X-External-Client $external_client'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.error.log
             - client_max_body_size: 128m
index 2d8922df9a8c727768bd4d57d69c6adab5c0fef3..91179d4a867271a2bfc4c6f1b6d4338e2fb2ee60 100644 (file)
@@ -16,7 +16,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_keepproxy_default:
+      arvados_keepproxy_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -29,9 +29,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_keepproxy_ssl:
+      arvados_keepproxy_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: keep.__CLUSTER__.__DOMAIN__
@@ -52,6 +54,7 @@ nginx:
             - client_max_body_size: 64M
             - proxy_http_version: '1.1'
             - proxy_request_buffering: 'off'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.error.log
index d180a3bad42e974d7d1796673e5f04df5a94e3ae..9ea16bfb546eea46e0f420eb6fbd9fa330dbaa52 100644 (file)
@@ -16,7 +16,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_collections_download_default:
+      arvados_collections_download_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -30,9 +30,11 @@ nginx:
               - return: '301 https://$host$request_uri'
 
       ### COLLECTIONS / DOWNLOAD
-      arvados_collections_download_ssl:
+      arvados_collections_download_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: collections.__CLUSTER__.__DOMAIN__ download.__CLUSTER__.__DOMAIN__
@@ -52,6 +54,7 @@ nginx:
             - client_max_body_size: 0
             - proxy_http_version: '1.1'
             - proxy_request_buffering: 'off'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.error.log
index 6ce75faa70c3d135076ffcf05d0b6dd2fcc76eef..a4d3c34f260e3cb5905830c40e19388f31561415 100644 (file)
@@ -3,19 +3,69 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- set passenger_pkg = 'nginx-mod-http-passenger'
+                          if grains.osfinger in ('CentOS Linux-7') else
+                        'libnginx-mod-http-passenger' %}
+{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so'
+                          if grains.osfinger in ('CentOS Linux-7',) else
+                        '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %}
+{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby'
+                           if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else
+                         '/usr/bin/ruby' %}
+
 ### NGINX
 nginx:
   install_from_phusionpassenger: true
   lookup:
-    passenger_package: libnginx-mod-http-passenger
-    passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf
+    passenger_package: {{ passenger_pkg }}
+  ### PASSENGER
+  passenger:
+    passenger_ruby: {{ passenger_ruby }}
 
   ### SERVER
   server:
     config:
-      include: 'modules-enabled/*.conf'
+      # This is required to get the passenger module loaded
+      # In Debian it can be done with this
+      # include: 'modules-enabled/*.conf'
+      load_module: {{ passenger_mod }}
+
       worker_processes: 4
 
+  ### SNIPPETS
+  snippets:
+    # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4
+    ssl_hardening_default.conf:
+      - ssl_session_timeout: 1d
+      - ssl_session_cache: 'shared:arvadosSSL:10m'
+      - ssl_session_tickets: 'off'
+
+      # intermediate configuration
+      - ssl_protocols: TLSv1.2 TLSv1.3
+      - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
+      - ssl_prefer_server_ciphers: 'off'
+
+      # HSTS (ngx_http_headers_module is required) (63072000 seconds)
+      - add_header: 'Strict-Transport-Security "max-age=63072000" always'
+
+      # OCSP stapling
+      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # - ssl_stapling: 'on'
+      # - ssl_stapling_verify: 'on'
+
+      # verify chain of trust of OCSP response using Root CA and Intermediate certs
+      # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates
+
+      # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam
+      # - ssl_dhparam: /path/to/dhparam
+
+      # replace with the IP address of your resolver
+      # - resolver: 127.0.0.1
+
+    arvados-snakeoil.conf:
+      - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem
+      - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key
+
   ### SITES
   servers:
     managed:
index e75f0443434285785b2b5444f83524f6d94058a5..9b73ab4a09e7282774e11e3c36ac02afb30983d7 100644 (file)
@@ -3,6 +3,20 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+# This parameter will be used here to generate a list of upstreams and vhosts.
+# This dict is here for convenience and should be managed some other way, but the
+# different ways of orchestration that can be used for this are outside the scope
+# of this formula and their examples.
+# These upstreams should match those defined in `arvados:cluster:resources:virtual_machines`
+{% set webshell_virtual_machines = {
+  'shell': {
+    'name': 'webshell',
+    'backend': '127.0.1.1',
+    'port': 4200,
+  }
+}
+%}
+
 ### NGINX
 nginx:
   ### SERVER
@@ -11,13 +25,20 @@ nginx:
 
       ### STREAMS
       http:
-        upstream webshell_upstream:
-          - server: 'shell.internal:4200 fail_timeout=10s'
+        {%- for vm, params in webshell_virtual_machines.items() %}
+          {%- set vm_name = params.name | default(vm) %}
+          {%- set vm_backend = params.backend | default(vm_name) %}
+          {%- set vm_port = params.port | default(4200) %}
+
+        upstream {{ vm_name }}_upstream:
+          - server: '{{ vm_backend }}:{{ vm_port }} fail_timeout=10s'
+
+        {%- endfor %}
 
   ### SITES
   servers:
     managed:
-      arvados_webshell_default:
+      arvados_webshell_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -30,17 +51,21 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_webshell_ssl:
+      arvados_webshell_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: webshell.__CLUSTER__.__DOMAIN__
             - listen:
               - __CONTROLLER_EXT_SSL_PORT__ http2 ssl
             - index: index.html index.htm
-            - location /shell.__CLUSTER__.__DOMAIN__:
-              - proxy_pass: 'http://webshell_upstream'
+            {%- for vm, params in webshell_virtual_machines.items() %}
+              {%- set vm_name = params.name | default(vm) %}
+            - location /{{ vm_name }}:
+              - proxy_pass: 'http://{{ vm_name }}_upstream'
               - proxy_read_timeout: 90
               - proxy_connect_timeout: 90
               - proxy_set_header: 'Host $http_host'
@@ -67,8 +92,9 @@ nginx:
                 - add_header: "'Access-Control-Allow-Origin' '*'"
                 - add_header: "'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'"
                 - add_header: "'Access-Control-Allow-Headers' 'DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type'"
-
-            - include: 'snippets/arvados-snakeoil.conf'
+            {%- endfor %}
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.error.log
 
index 3a354ac293de96d93faac2c9013750ac825287aa..bcd0457c9e18f2012e7d19254814fb24f0dfbe93 100644 (file)
@@ -16,7 +16,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_websocket_default:
+      arvados_websocket_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -29,9 +29,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_websocket_ssl:
+      arvados_websocket_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: ws.__CLUSTER__.__DOMAIN__
@@ -53,6 +55,7 @@ nginx:
             - client_max_body_size: 64M
             - proxy_http_version: '1.1'
             - proxy_request_buffering: 'off'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.error.log
index 8fdd553991ed86be5d83adb056e12f6348a9bdee..44bd16fe3e9e94cf1a75f7f8edbd13a3b11fd848 100644 (file)
@@ -1,12 +1,18 @@
 ---
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
+
+{%- if grains.os_family in ('RedHat',) %}
+  {%- set group = 'nginx' %}
+{%- else %}
+  {%- set group = 'www-data' %}
+{%- endif %}
 
 ### ARVADOS
 arvados:
   config:
-    group: www-data
+    group: {{ group }}
 
 ### NGINX
 nginx:
@@ -14,7 +20,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_workbench2_default:
+      arvados_workbench2_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -27,9 +33,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_workbench2_ssl:
+      arvados_workbench2_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: workbench2.__CLUSTER__.__DOMAIN__
@@ -43,6 +51,7 @@ nginx:
                 - return: 503
             - location /config.json:
               - return: {{ "200 '" ~ '{"API_HOST":"__CLUSTER__.__DOMAIN__:__CONTROLLER_EXT_SSL_PORT__"}' ~ "'" }}
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.error.log
index 649af10b6d8b5c497b5cde653df2aef2e86e0f6a..6b7ab969f964606bd88d50ec4a3f66cb63f517d6 100644 (file)
@@ -3,10 +3,16 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- if grains.os_family in ('RedHat',) %}
+  {%- set group = 'nginx' %}
+{%- else %}
+  {%- set group = 'www-data' %}
+{%- endif %}
+
 ### ARVADOS
 arvados:
   config:
-    group: www-data
+    group: {{ group }}
 
 ### NGINX
 nginx:
@@ -23,7 +29,7 @@ nginx:
   servers:
     managed:
       ### DEFAULT
-      arvados_workbench_default:
+      arvados_workbench_default.conf:
         enabled: true
         overwrite: true
         config:
@@ -36,9 +42,11 @@ nginx:
             - location /:
               - return: '301 https://$host$request_uri'
 
-      arvados_workbench_ssl:
+      arvados_workbench_ssl.conf:
         enabled: true
         overwrite: true
+        requires:
+          file: nginx_snippet_arvados-snakeoil.conf
         config:
           - server:
             - server_name: workbench.__CLUSTER__.__DOMAIN__
@@ -54,11 +62,12 @@ nginx:
               - proxy_set_header: 'Host $http_host'
               - proxy_set_header: 'X-Real-IP $remote_addr'
               - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
-            - include: 'snippets/arvados-snakeoil.conf'
+            - include: snippets/ssl_hardening_default.conf
+            - include: snippets/arvados-snakeoil.conf
             - access_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.access.log combined
             - error_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.error.log
 
-      arvados_workbench_upstream:
+      arvados_workbench_upstream.conf:
         enabled: true
         overwrite: true
         config:
index 71e712cad3c278d3ac5bbedc2d3b36bd2fe59993..fda1545a05bcf8048b47f2838a058ccd7c542ffb 100644 (file)
@@ -5,11 +5,29 @@
 
 ### POSTGRESQL
 postgres:
-  use_upstream_repo: false
+  # Centos-7's postgres package is too old, so we need to force using upstream's
+  # This is not required in Debian's family as they already ship with PG +11
+  {%- if salt['grains.get']('os_family') == 'RedHat' %}
+  use_upstream_repo: true
+  version: '12'
+
+  pkgs_deps:
+    - libicu
+    - libxslt
+    - systemd-sysv
+
+  pkgs_extra:
+    - postgresql12-contrib
+
+  {%- else %}
   pkgs_extra:
     - postgresql-contrib
+  {%- endif %}
   postgresconf: |-
     listen_addresses = '*'  # listen on all interfaces
+    #ssl = on
+    #ssl_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem'
+    #ssl_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key'
   acls:
     - ['local', 'all', 'postgres', 'peer']
     - ['local', 'all', 'all', 'peer']
index fb1473def250dea3405890a54de90070d248fae0..91617e4fa4765e5e3365a4269937ac6987a94d17 100644 (file)
@@ -1,15 +1,22 @@
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
 
 {%- set curr_tpldir = tpldir %}
 {%- set tpldir = 'arvados' %}
 {%- from "arvados/map.jinja" import arvados with context %}
 {%- set tpldir = curr_tpldir %}
 
-{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %}
+include:
+  - nginx.passenger
+  - nginx.config
+  - nginx.service
+
+# Debian uses different dirs for certs and keys, but being a Snake Oil example,
+# we'll keep it simple here.
+{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %}
 {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %}
-{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %}
+{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %}
 {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %}
 {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %}
 
@@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in
       - ca-certificates
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run:
-  # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run
+  # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run
   cmd.run:
     - name: |
         # These dirs are not to CentOS-ish, but this is a helper script
@@ -121,6 +128,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c
     - require:
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run
+    # We need this before we can add the nginx's snippet
+    - require_in:
+      - file: nginx_snippet_arvados-snakeoil.conf
 
 {%- if grains.get('os_family') == 'Debian' %}
 arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed:
@@ -130,29 +140,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal
       - sls: postgres
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run:
-  cmd.run:
-    - name: |
-        chown root:ssl-cert {{ arvados_key_file }}
+  file.managed:
+    - name: {{ arvados_key_file }}
+    - owner: root
+    - group: ssl-cert
     - require:
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed
-{%- endif %}
-
-arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed:
-  file.managed:
-    - name: /etc/nginx/snippets/arvados-snakeoil.conf
-    - contents: |
-        ssl_certificate {{ arvados_cert_file }};
-        ssl_certificate_key {{ arvados_key_file }};
-    - watch_in:
-      - service: nginx_service
-    - require:
-      - pkg: passenger_install
-      - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run
     - require_in:
-      - file: nginx_config
-      - service: nginx_service
-    - watch_in:
-      - service: nginx_service
-
-
+      - file: nginx_snippet_arvados-snakeoil.conf
+{%- endif %}
index 6ce75faa70c3d135076ffcf05d0b6dd2fcc76eef..a4d3c34f260e3cb5905830c40e19388f31561415 100644 (file)
@@ -3,19 +3,69 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+{%- set passenger_pkg = 'nginx-mod-http-passenger'
+                          if grains.osfinger in ('CentOS Linux-7') else
+                        'libnginx-mod-http-passenger' %}
+{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so'
+                          if grains.osfinger in ('CentOS Linux-7',) else
+                        '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %}
+{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby'
+                           if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else
+                         '/usr/bin/ruby' %}
+
 ### NGINX
 nginx:
   install_from_phusionpassenger: true
   lookup:
-    passenger_package: libnginx-mod-http-passenger
-    passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf
+    passenger_package: {{ passenger_pkg }}
+  ### PASSENGER
+  passenger:
+    passenger_ruby: {{ passenger_ruby }}
 
   ### SERVER
   server:
     config:
-      include: 'modules-enabled/*.conf'
+      # This is required to get the passenger module loaded
+      # In Debian it can be done with this
+      # include: 'modules-enabled/*.conf'
+      load_module: {{ passenger_mod }}
+
       worker_processes: 4
 
+  ### SNIPPETS
+  snippets:
+    # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4
+    ssl_hardening_default.conf:
+      - ssl_session_timeout: 1d
+      - ssl_session_cache: 'shared:arvadosSSL:10m'
+      - ssl_session_tickets: 'off'
+
+      # intermediate configuration
+      - ssl_protocols: TLSv1.2 TLSv1.3
+      - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
+      - ssl_prefer_server_ciphers: 'off'
+
+      # HSTS (ngx_http_headers_module is required) (63072000 seconds)
+      - add_header: 'Strict-Transport-Security "max-age=63072000" always'
+
+      # OCSP stapling
+      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # - ssl_stapling: 'on'
+      # - ssl_stapling_verify: 'on'
+
+      # verify chain of trust of OCSP response using Root CA and Intermediate certs
+      # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates
+
+      # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam
+      # - ssl_dhparam: /path/to/dhparam
+
+      # replace with the IP address of your resolver
+      # - resolver: 127.0.0.1
+
+    arvados-snakeoil.conf:
+      - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem
+      - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key
+
   ### SITES
   servers:
     managed:
index 130fb5e937affe145b06c9f75b0ec2f6540003c8..b6929fb887ba6827a0979872ccee415a01d22c94 100644 (file)
@@ -1,15 +1,22 @@
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
 
 {%- set curr_tpldir = tpldir %}
 {%- set tpldir = 'arvados' %}
 {%- from "arvados/map.jinja" import arvados with context %}
 {%- set tpldir = curr_tpldir %}
 
-{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %}
+include:
+  - nginx.passenger
+  - nginx.config
+  - nginx.service
+
+# Debian uses different dirs for certs and keys, but being a Snake Oil example,
+# we'll keep it simple here.
+{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %}
 {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %}
-{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %}
+{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %}
 {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %}
 {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %}
 
@@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in
       - ca-certificates
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run:
-  # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run
+  # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run
   cmd.run:
     - name: |
         # These dirs are not to CentOS-ish, but this is a helper script
@@ -124,6 +131,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c
     - require:
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run
+    # We need this before we can add the nginx's snippet
+    - require_in:
+      - file: nginx_snippet_arvados-snakeoil.conf
 
 {%- if grains.get('os_family') == 'Debian' %}
 arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed:
@@ -133,26 +143,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal
       - sls: postgres
 
 arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run:
-  cmd.run:
-    - name: |
-        chown root:ssl-cert {{ arvados_key_file }}
+  file.managed:
+    - name: {{ arvados_key_file }}
+    - owner: root
+    - group: ssl-cert
     - require:
       - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run
       - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed
-{%- endif %}
-
-arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed:
-  file.managed:
-    - name: /etc/nginx/snippets/arvados-snakeoil.conf
-    - contents: |
-        ssl_certificate {{ arvados_cert_file }};
-        ssl_certificate_key {{ arvados_key_file }};
-    - require:
-      - pkg: nginx_install
     - require_in:
-      - file: nginx_config
-      - service: nginx_service
-    - watch_in:
-      - service: nginx_service
-
-
+      - file: nginx_snippet_arvados-snakeoil.conf
+{%- endif %}
index 17b7b888846fca194a04f60af829dd5ee271a4e5..283c631ec5853d34b63ca5db28e1ebd003225579 100644 (file)
@@ -100,6 +100,6 @@ RELEASE="production"
 # ARVADOS_TAG="2.2.0"
 # POSTGRES_TAG="v0.41.6"
 # NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
index ae54e7437a83db83b7373eaa6ef87d70aa31e8b5..e23634e8c4d6d2a9ec50593bdea3e328618dffd6 100644 (file)
@@ -72,6 +72,6 @@ RELEASE="production"
 # ARVADOS_TAG="2.2.0"
 # POSTGRES_TAG="v0.41.6"
 # NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
index a35bd45bffc258d7c3a8dd4b59eb564bfc13c4b8..ae9804863f4a47dc179ed71efed6d038eb57010c 100644 (file)
@@ -81,6 +81,6 @@ RELEASE="production"
 # ARVADOS_TAG="2.2.0"
 # POSTGRES_TAG="v0.41.6"
 # NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
 # LOCALE_TAG="v0.3.4"
 # LETSENCRYPT_TAG="v2.1.0"
index 7ac120e5fd89179f75fcf13608679edfaa2b45e5..b840d86c6f360d3440328bd676dd66656739be5b 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/bash
 
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
@@ -11,6 +11,7 @@
 # vagrant up
 
 set -o pipefail
+set -x
 
 # capture the directory that the script is running from
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
@@ -177,7 +178,7 @@ VERSION="latest"
 # Other formula versions we depend on
 POSTGRES_TAG="v0.41.6"
 NGINX_TAG="temp-fix-missing-statements-in-pillar"
-DOCKER_TAG="v1.0.0"
+DOCKER_TAG="v2.0.7"
 LOCALE_TAG="v0.3.4"
 LETSENCRYPT_TAG="v2.1.0"
 
@@ -232,8 +233,23 @@ fi
 if [ "${DUMP_CONFIG}" = "yes" ]; then
   echo "The provision installer will just dump a config under ${DUMP_SALT_CONFIG_DIR} and exit"
 else
-  apt-get update
-  apt-get install -y curl git jq
+  # Install a few dependency packages
+  # First, let's figure out the OS we're working on
+  OS_ID=$(grep ^ID= /etc/os-release |cut -f 2 -d=  |cut -f 2 -d \")
+  echo "Detected distro: ${OS_ID}"
+
+  case ${OS_ID} in
+    "centos")
+      echo "WARNING! Disabling SELinux, see https://dev.arvados.org/issues/18019"
+      sed -i 's/SELINUX=enforcing/SELINUX=permissive' /etc/sysconfig/selinux
+      setenforce permissive
+      yum install -y  curl git jq
+      ;;
+    "debian"|"ubuntu")
+      DEBIAN_FRONTEND=noninteractive apt update
+      DEBIAN_FRONTEND=noninteractive apt install -y curl git jq
+      ;;
+  esac
 
   if which salt-call; then
     echo "Salt already installed"
@@ -246,6 +262,8 @@ else
 
   # Set salt to masterless mode
   cat > /etc/salt/minion << EOFSM
+failhard: "True"
+
 file_client: local
 file_roots:
   base:
@@ -607,5 +625,10 @@ fi
 # Test that the installation finished correctly
 if [ "x${TEST}" = "xyes" ]; then
   cd ${T_DIR}
-  ./run-test.sh
+  # If we use RVM, we need to run this with it, or most ruby commands will fail
+  RVM_EXEC=""
+  if [ -x /usr/local/rvm/bin/rvm-exec ]; then
+    RVM_EXEC="/usr/local/rvm/bin/rvm-exec"
+  fi
+  ${RVM_EXEC} ./run-test.sh
 fi
index 53c51a2c5a097d2e8b45446ea26a7e2a26800f2d..020efa94e8f61303e06da5d087ecd712f9f1991f 100755 (executable)
@@ -55,13 +55,17 @@ echo "Activating user '__INITIAL_USER__'"
 arv user update --uuid "${user_uuid}" --user '{"is_active": true}'
 
 echo "Getting the user API TOKEN"
-user_api_token=$(arv api_client_authorization list --filters "[[\"owner_uuid\", \"=\", \"${user_uuid}\"],[\"kind\", \"==\", \"arvados#apiClientAuthorization\"]]" --limit=1 |jq -r .items[].api_token)
+user_api_token=$(arv api_client_authorization list | jq -r ".items[] | select( .owner_uuid == \"${user_uuid}\" ).api_token" | head -1)
 
 if [ "x${user_api_token}" = "x" ]; then
+  echo "No existing token found for user '__INITIAL_USER__' (user_uuid: '${user_uuid}'). Creating token"
   user_api_token=$(arv api_client_authorization create --api-client-authorization "{\"owner_uuid\": \"${user_uuid}\"}" | jq -r .api_token)
 fi
 
+echo "API TOKEN FOR user '__INITIAL_USER__': '${user_api_token}'."
+
 # Change to the user's token and run the workflow
+echo "Switching to user '__INITIAL_USER__'"
 export ARVADOS_API_TOKEN="${user_api_token}"
 
 echo "Running test CWL workflow"