Merge branch '18631-shell-login-sync'

author Javier Bértoli <jbertoli@curii.com>

Wed, 6 Apr 2022 13:50:51 +0000 (10:50 -0300)

committer Javier Bértoli <jbertoli@curii.com>

Wed, 6 Apr 2022 13:50:51 +0000 (10:50 -0300)
author Javier Bértoli <jbertoli@curii.com>
Wed, 6 Apr 2022 13:50:51 +0000 (10:50 -0300)
committer Javier Bértoli <jbertoli@curii.com>
Wed, 6 Apr 2022 13:50:51 +0000 (10:50 -0300)
diff --git a/.licenseignore b/.licenseignore

index 97ce38af93b89f2e69a60a152381574591a7d1c8..d13eee39015963c6d33ec6e48e6dbb52298258a8 100644 (file)
--- a/.licenseignore
+++ b/.licenseignore
@@ -88,4 +88,5 @@ sdk/python/tests/fed-migrate/*.cwlex
  doc/install/*.xlsx
  sdk/cwl/tests/wf/hello.txt
  sdk/cwl/tests/wf/indir1/hello2.txt
-sdk/cwl/tests/chipseq/data/Genomes/*
-\ No newline at end of file
+sdk/cwl/tests/chipseq/data/Genomes/*
+CITATION.cff
diff --git a/AUTHORS b/AUTHORS

index b8b75518ff08ad0f0d787f27f52ba8f58f2bece2..fa9fa86d34efe9aae853ca24d717974eb8513edc 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -18,7 +18,7 @@ President and Fellows of Harvard College <*@harvard.edu>
  Thomas Mooney <tmooney@genome.wustl.edu>
  Chen Chen <aflyhorse@gmail.com>
  Veritas Genetics, Inc. <*@veritasgenetics.com>
-Curii Corporation, Inc. <*@curii.com>
+Curii Corporation <*@curii.com>
  Dante Tsang <dante@dantetsang.com>
  Codex Genetics Ltd <info@codexgenetics.com>
  Bruno P. Kinoshita <brunodepaulak@yahoo.com.br>
diff --git a/CITATION.cff b/CITATION.cff

new file mode 100644 (file)

index 0000000..df3b35d
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,37 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- name: "The Arvados Authors"
+- family-names: "Amstutz"
+  given-names: "Peter"
+  orcid: "https://orcid.org/0000-0003-3566-7705"
+- family-names: "Bértoli"
+  given-names: "Javier"
+  family-names: "César"
+  given-names: "Nico"
+- family-names: "Clegg"
+  given-names: "Tom"
+  orcid: "https://orcid.org/0000-0001-6751-2930"
+- family-names: "Di Pentima"
+  given-names: "Lucas"
+  orcid: "https://orcid.org/0000-0002-2807-6854"
+- family-names: "Kutyła"
+  given-names: "Daniel"
+- family-names: "Li"
+  given-names: "Jiayong"
+- family-names: "Smith"
+  given-names: "Stephen"
+- family-names: "Vandewege"
+  given-names: "Ward"
+  orcid: "https://orcid.org/0000-0002-2527-6949"
+- family-names: "Wait Zaranek"
+  given-names: "Alexander"
+  orcid: "https://orcid.org/0000-0002-0415-9655"
+- family-names: "Wait Zaranek"
+  given-names: "Sarah"
+  orcid: "https://orcid.org/0000-0003-4716-9121"
+title: "Arvados"
+abstract: "Arvados is an open source platform for managing, processing, and sharing genomic and other large scientific and biomedical data."
+type: software
+url: "https://github.com/arvados/arvados/"
+doi: 10.5281/zenodo.6382942
diff --git a/apps/workbench/app/controllers/application_controller.rb b/apps/workbench/app/controllers/application_controller.rb

index 5312e733f41eb992131752ccf782e112b8c5af2e..f8c8079a1edf4f5878c456d8f4ea0893e2574a32 100644 (file)
--- a/apps/workbench/app/controllers/application_controller.rb
+++ b/apps/workbench/app/controllers/application_controller.rb
@@ -172,7 +172,7 @@ class ApplicationController < ActionController::Base
  
    def find_objects_for_index
      @objects ||= model_class
-    @objects = @objects.filter(@filters).limit(@limit).offset(@offset)
+    @objects = @objects.filter(@filters).limit(@limit).offset(@offset).order(@order)
      @objects.fetch_multiple_pages(false)
    end
  
diff --git a/apps/workbench/config/arvados_config.rb b/apps/workbench/config/arvados_config.rb

index c5cc544b9b8717fc70b51c52c1c4b35ce03a16aa..7cc46d2983490896c36e96c9081bf3e74013efa2 100644 (file)
--- a/apps/workbench/config/arvados_config.rb
+++ b/apps/workbench/config/arvados_config.rb
@@ -199,4 +199,8 @@ ArvadosWorkbench::Application.configure do
      ConfigValidators.validate_wb2_url_config()
      ConfigValidators.validate_download_config()
    end
+  if Rails.configuration.Users.AnonymousUserToken and
+     !Rails.configuration.Users.AnonymousUserToken.starts_with?("v2/")
+    Rails.configuration.Users.AnonymousUserToken = "v2/#{clusterID}-gj3su-anonymouspublic/#{Rails.configuration.Users.AnonymousUserToken}"
+  end
  end
diff --git a/build/run-tests.sh b/build/run-tests.sh

index 3592efbdc2bee5f6c4e25251677c581dcf65c088..67c54c98b0244a72842bccfbb964ce3dbd164c0c 100755 (executable)
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -556,6 +556,12 @@ setup_ruby_environment() {
              done
              "$bundle" version | tee /dev/stderr | grep -q 'version 2'
          ) || fatal 'install bundler'
+       if test -d /var/lib/arvados-arvbox/ ; then
+           # Inside arvbox, use bundler-installed binstubs.  The
+           # system bundler and rail's own bin/bundle refuse to work.
+           # I don't know why.
+           bundle=binstubs/bundle
+       fi
      fi
  }
  
diff --git a/doc/_includes/_container_scheduling_parameters.liquid b/doc/_includes/_container_scheduling_parameters.liquid

index be046173ad028b92fd9b26a4a2994f5b4c9fd295..636b6df59c455d5badac1bda9ded03434403207d 100644 (file)
--- a/doc/_includes/_container_scheduling_parameters.liquid
+++ b/doc/_includes/_container_scheduling_parameters.liquid
@@ -11,5 +11,5 @@ Parameters to be passed to the container scheduler (e.g., Slurm) when running a
  table(table table-bordered table-condensed).
  |_. Key|_. Type|_. Description|_. Notes|
  |partitions|array of strings|The names of one or more compute partitions that may run this container. If not provided, the system will choose where to run the container.|Optional.|
-|preemptible|boolean|If true, the dispatcher will ask for a preemptible cloud node instance (eg: AWS Spot Instance) to run this container.|Optional. Default is false.|
+|preemptible|boolean|If true, the dispatcher should use a preemptible cloud node instance (eg: AWS Spot Instance) to run this container.  Whether a preemptible instance is actually used "depends on cluster configuration.":{{site.baseurl}}/admin/spot-instances.html|Optional. Default is false.|
  |max_run_time|integer|Maximum running time (in seconds) that this container will be allowed to run before being cancelled.|Optional. Default is 0 (no limit).|
diff --git a/doc/admin/spot-instances.html.textile.liquid b/doc/admin/spot-instances.html.textile.liquid

index 7ca57df0ab0d1cee9ad6fc0e0b0e44f2455edc92..703e70fb8636afe4d99b2b356bc4a4326d0d46e7 100644 (file)
--- a/doc/admin/spot-instances.html.textile.liquid
+++ b/doc/admin/spot-instances.html.textile.liquid
@@ -16,31 +16,57 @@ Currently Arvados supports preemptible instances using AWS and Azure spot instan
  
  h2. Configuration
  
-First, ensure automatic selection of preemptible instances is not disabled in your configuration file (this is enabled by default, but can be disabled with @AlwaysUsePreemptibleInstances: false@), and add entries to @InstanceTypes@ that have @Preemptible: true@.  Typically you want to add both preemptible and non-preemptible entries for each cloud provider VM type.  The @Price@ for preemptible instances is the maximum bid price, the actual price paid is dynamic and will likely be lower.  For example:
+First, configure some @InstanceTypes@ that have @Preemptible: true@. For a preemptible instance, @Price@ determines the maximum bid price; the actual price paid is dynamic and will likely be lower.
+
+Typically you want to add both preemptible and non-preemptible entries for each cloud provider VM type. To do this automatically, use @PreemptiblePriceFactor@ to enable a preemptible version of each listed type, using the given factor to set the maximum bid price relative to the non-preemptible price. Alternatively, you can configure preemptible instance types explicitly. For example, the following two configurations are equivalent:
  
  <pre>
  Clusters:
-  ClusterID: 
+  ClusterID:
      Containers:
-      AlwaysUsePreemptibleInstances: true
+      PreemptiblePriceFactor: 0.8
      InstanceTypes:
        m4.large:
-        Preemptible: false
          ProviderType: m4.large
          VCPUs: 2
          RAM: 8GiB
          AddedScratch: 32GB
          Price: 0.1
-      m4.large.spot:
-        Preemptible: true
+</pre>
+
+<pre>
+Clusters:
+  ClusterID:
+    InstanceTypes:
+      m4.large:
          ProviderType: m4.large
          VCPUs: 2
          RAM: 8GiB
          AddedScratch: 32GB
          Price: 0.1
+      m4.large.preemptible:
+        Preemptible: true
+        ProviderType: m4.large
+        VCPUs: 2
+        RAM: 8GiB
+        AddedScratch: 32GB
+        Price: 0.08
  </pre>
  
-When @AlwaysUsePreemptibleInstances@ is enabled, child containers (workflow steps) will automatically be made preemptible.  Note that because preempting the workflow runner would cancel the entire workflow, the workflow runner runs in a reserved (non-preemptible) instance.
+Next, you can choose to enable automatic use of preemptible instances:
+
+<pre>
+Clusters:
+  ClusterID:
+    Containers:
+      AlwaysUsePreemptibleInstances: true
+</pre>
+
+If @AlwaysUsePreemptibleInstances@ is "true", child containers (workflow steps) will always select preemptible instances, regardless of user option.
+
+If @AlwaysUsePreemptibleInstances@ is "false" (the default) or unspecified, preemptible instance are "used when requested by the user.":{{site.baseurl}}/user/cwl/cwl-run-options.html#preemptible
+
+Note that regardless of the value of @AlwaysUsePreemptibleInstances@, the top level workflow runner container always runs in a reserved (non-preemptible) instance, to avoid situations where the workflow runner is killed requiring the entire to be restarted.
  
  No additional configuration is required, "arvados-dispatch-cloud":{{site.baseurl}}/install/crunch2-cloud/install-dispatch-cloud.html will now start preemptible instances where appropriate.
  
diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid

index abaa190c8c781e26529fe795be0c06a303a8ace0..7e36088c636bad036dae7a05b139dcfd91535181 100644 (file)
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -28,9 +28,15 @@ TODO: extract this information based on git commit messages and generate changel
  <div class="releasenotes">
  </notextile>
  
-h2(#main). development main (as of 2022-03-08)
+h2(#main). development main (as of 2022-03-??)
  
-"previous: Upgrading to 2.3.0":#v2_3_0
+h2(#v2_4_0). v2.4.0 (2022-04-??)
+
+"previous: Upgrading to 2.3.1":#v2_3_1
+
+h3. Default result order changed
+
+When requesting a list of objects without an explicit @order@ parameter, the default order has changed from @modified_at desc, uuid asc@ to @modified_at desc, uuid desc@.  This means that if two objects have identical @modified_at@ timestamps, the tiebreaker will now be based on @uuid@ in decending order where previously it would be ascending order. The practical effect of this should be minor; with microsecond precision it is unusual to have two records with exactly the same timestamp, and order-sensitive queries should already provide an explicit @order@ parameter.
  
  h3. Ubuntu 18.04 Arvados Python packages now depend on python-3.8
  
@@ -44,27 +50,35 @@ The minimum supported Ruby version is now 2.6.  If you are running Arvados on De
  
  h3. Anonymous token changes
  
-The anonymous token configured in @Users.AnonymousUserToken@ must now be 32 characters or longer. This was already the suggestion in the documentation, now it is enforced. The @script/get_anonymous_user_token.rb@ script that was needed to register the anonymous user token in the database has been removed. Registration of the anonymous token is no longer necessary. If the anonymous token in @config.yml@ is specified as a full V2 token, that will now generate a warning - it should be updated to list just the secret (i.e. the part after the last forward slash).
+The anonymous token configured in @Users.AnonymousUserToken@ must now be 32 characters or longer. This was already the suggestion in the documentation, now it is enforced. The @script/get_anonymous_user_token.rb@ script that was needed to register the anonymous user token in the database has been removed. Registration of the anonymous token is no longer necessary.
  
-h3. Preemptible instance types are used automatically, if any are configured
+h3. Preemptible instance support changes
  
-The default behavior for selecting "preemptible instances":{{site.baseurl}}/admin/spot-instances.html has changed. If your configuration lists any instance types with @Preemptible: true@, all child (non-top-level) containers will automatically be scheduled on preemptible instances. To avoid using preemptible instances except when explicitly requested by clients, add @AlwaysUsePreemptibleInstances: false@ in the @Containers@ config section. (Previously, preemptible instance types were never used unless the configuration specified @UsePreemptibleInstances: true@. That flag has been removed.)
+The @Containers.UsePreemptibleInstances@ option has been renamed to @Containers.AlwaysUsePreemptibleInstances@ and has the same behavior when @true@ and one or more preemptible instances are configured.  However, a value of @false@ no longer disables support for preemptible instances, instead users can now enable use of preemptible instances at the level of an individual workflow or workflow step.
  
-h3. Role groups are visible to all users by default
-
-The permission model has changed such that all role groups are visible to all active users. This enables users to share objects with groups they don't belong to. To preserve the previous behavior, where role groups are only visible to members and admins, add @RoleGroupsVisibleToAll: false@ to the @Users@ section of your configuration file.
+In addition, there is a new configuration option @Containers.PreemptiblePriceFactor@ will automatically add a preemptible instance type corresponding to each regular instance type.  See "Using Preemptible instances":spot-instances.html for details.
  
  h3. Default LSF arguments have changed
  
  If you use LSF and your configuration specifies @Containers.LSF.BsubArgumentsList@, you should update it to include the new arguments (@"-R", "select[mem>=%MMB]", ...@, see "configuration reference":{{site.baseurl}}/admin/config.html). Otherwise, containers that are too big to run on any LSF host will remain in the LSF queue instead of being cancelled.
  
-h3. Previously trashed role groups will be deleted
+h3. Support for NVIDIA CUDA GPUs
  
-Due to a bug in previous versions, the @DELETE@ operation on a role group caused the group to be flagged as trash in the database, but continue to grant permissions regardless. After upgrading, any role groups that had been trashed this way will be deleted. This might surprise some users if they were relying on permissions that were still in effect due to this bug. Future @DELETE@ operations on a role group will immediately delete the group and revoke the associated permissions.
+Arvados now supports requesting NVIDIA CUDA GPUs for cloud and LSF (Slurm is currently not supported).  To be able to request GPU nodes, some additional configuration is needed:
  
-h3. Users are visible to other users by default
+"Including GPU support in cloud compute node image":{{site.baseurl}}/install/crunch2-cloud/install-compute-node.html#nvidia
  
-When a new user is set up (either via @AutoSetupNewUsers@ config or via Workbench admin interface) the user immediately becomes visible to other users. To revert to the previous behavior, where the administrator must add two users to the same group using the Workbench admin interface in order for the users to see each other, change the new @Users.ActivatedUsersAreVisibleToOthers@ config to @false@.
+"Configure cloud dispatcher for GPU support":{{site.baseurl}}/install/crunch2-cloud/install-dispatch-cloud.html#GPUsupport
+
+"LSF GPU configuration":{{site.baseurl}}/install/crunch2-lsf/install-dispatch.html
+
+h3. Role groups are visible to all users by default
+
+The permission model has changed such that all role groups are visible to all active users. This enables users to share objects with groups they don't belong to. To preserve the previous behavior, where role groups are only visible to members and admins, add @RoleGroupsVisibleToAll: false@ to the @Users@ section of your configuration file.
+
+h3. Previously trashed role groups will be deleted
+
+Due to a bug in previous versions, the @DELETE@ operation on a role group caused the group to be flagged as trash in the database, but continue to grant permissions regardless. After upgrading, any role groups that had been trashed this way will be deleted. This might surprise some users if they were relying on permissions that were still in effect due to this bug. Future @DELETE@ operations on a role group will immediately delete the group and revoke the associated permissions.
  
  h3. Dedicated keepstore process for each container
  
@@ -73,6 +87,14 @@ When Arvados runs a container via @arvados-dispatch-cloud@, the @crunch-run@ sup
  * If you already have a robust permanent keepstore infrastructure, you can set @Containers.LocalKeepBlobBuffersPerVCPU@ to 0 to disable this feature and preserve the previous behavior of sending container I/O traffic to your separately provisioned keepstore servers.
  * This feature is enabled only if no volumes use @AccessViaHosts@, and no volumes have underlying @Replication@ less than @Collections.DefaultReplication@. If the feature is configured but cannot be enabled due to an incompatible volume configuration, this will be noted in the @crunch-run.txt@ file in the container log.
  
+h2(#v2_3_1). v2.3.1 (2021-11-24)
+
+"previous: Upgrading to 2.3.0":#v2_3_0
+
+h3. Users are visible to other users by default
+
+When a new user is set up (either via @AutoSetupNewUsers@ config or via Workbench admin interface) the user immediately becomes visible to other users. To revert to the previous behavior, where the administrator must add two users to the same group using the Workbench admin interface in order for the users to see each other, change the new @Users.ActivatedUsersAreVisibleToOthers@ config to @false@.
+
  h3. Backend support for vocabulary checking
  
  If your installation uses the vocabulary feature on Workbench2, you will need to update the cluster configuration by moving the vocabulary definition file to the node where @controller@ runs, and set the @API.VocabularyPath@ configuration parameter to the local path where the file was placed.
diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid

index 5ff8d529f8376ceb8b5372f8a94873e921c0961e..a2a6a77e19f6350c1be3c770a1560f42c9cc6402 100644 (file)
--- a/doc/api/methods/collections.html.textile.liquid
+++ b/doc/api/methods/collections.html.textile.liquid
@@ -109,6 +109,36 @@ table(table table-bordered table-condensed).
  
  Note: Because adding access tokens to manifests can be computationally expensive, the @manifest_text@ field is not included in results by default.  If you need it, pass a @select@ parameter that includes @manifest_text@.
  
+h4. Searching Collections for names of file or directories
+
+You can search collections for specific file or directory names (whole or part) using the following filter in a @list@ query.
+
+<pre>
+filters: [["file_names", "ilike", "%sample1234.fastq%"]]
+</pre>
+
+Note: @file_names@ is a hidden field used for indexing.  It is not returned by any API call.  On the client, you can programmatically enumerate all the files in a collection using @arv-ls@, the Python SDK @Collection@ class, Go SDK @FileSystem@ struct, the WebDAV API, or the S3-compatible API.
+
+As of this writing (Arvados 2.4), you can also search for directory paths, but _not_ complete file paths.
+
+In other words, this will work (when @dir3@ is a directory):
+
+<pre>
+filters: [["file_names", "ilike", "%dir1/dir2/dir3%"]]
+</pre>
+
+However, this will _not_ return the desired results (where @sample1234.fastq@ is a file):
+
+<pre>
+filters: [["file_names", "ilike", "%dir1/dir2/dir3/sample1234.fastq%"]]
+</pre>
+
+As a workaround, you can search for both the directory path and file name separately, and then filter on the client side.
+
+<pre>
+filters: [["file_names", "ilike", "%dir1/dir2/dir3%"], ["file_names", "ilike", "%sample1234.fastq%"]]
+</pre>
+
  h3. update
  
  Update attributes of an existing Collection.
diff --git a/doc/api/methods/groups.html.textile.liquid b/doc/api/methods/groups.html.textile.liquid

index e4b8594dd195b7e7c4240d9628cc83065572f0f5..2a762d92480955c0a55ed3e2e4fe11d7139b6664 100644 (file)
--- a/doc/api/methods/groups.html.textile.liquid
+++ b/doc/api/methods/groups.html.textile.liquid
@@ -34,6 +34,17 @@ table(table table-bordered table-condensed).
  |trash_at|datetime|If @trash_at@ is non-null and in the past, this group and all objects directly or indirectly owned by the group will be hidden from API calls.  May be untrashed.||
  |delete_at|datetime|If @delete_at@ is non-null and in the past, the group and all objects directly or indirectly owned by the group may be permanently deleted.||
  |is_trashed|datetime|True if @trash_at@ is in the past, false if not.||
+|frozen_by_uuid|string|For a frozen project, indicates the user who froze the project; null in all other cases. When a project is frozen, no further changes can be made to the project or its contents, even by admins. Attempting to add new items or modify, rename, move, trash, or delete the project or its contents, including any subprojects, will return an error.||
+
+h3. Frozen projects
+
+A user with @manage@ permission can set the @frozen_by_uuid@ attribute of a @project@ group to their own user UUID. Once this is done, no further changes can be made to the project or its contents, including subprojects.
+
+The @frozen_by_uuid@ attribute can be cleared by an admin user. It can also be cleared by a user with @manage@ permission, unless the @API.UnfreezeProjectRequiresAdmin@ configuration setting is active.
+
+The optional @API.FreezeProjectRequiresDescription@ and @API.FreezeProjectRequiresProperties@ configuration settings can be used to prevent users from freezing projects that have empty @description@ and/or specified @properties@ entries.
+
+h3. Filter groups
  
  @filter@ groups are virtual groups; they can not own other objects. Filter groups have a special @properties@ field named @filters@, which must be an array of filter conditions. See "list method filters":{{site.baseurl}}/api/methods.html#filters for details on the syntax of valid filters, but keep in mind that the attributes must include the object type (@collections@, @container_requests@, @groups@, @workflows@), separated with a dot from the field to be filtered on.
  
diff --git a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid

index ee71d7a3f61b6b96577b34ff9d060509f0ee0b93..0ed7a599fc79df23528e78fd0691d9a14c2bf0d7 100644 (file)
--- a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
@@ -74,7 +74,7 @@ Add or update the following portions of your cluster configuration file, @config
  </code></pre>
  </notextile>
  
-h4. NVIDIA GPU support
+h4(#GPUsupport). NVIDIA GPU support
  
  To specify instance types with NVIDIA GPUs, you must include an additional @CUDA@ section:
  
diff --git a/doc/install/install-postgresql.html.textile.liquid b/doc/install/install-postgresql.html.textile.liquid

index 1413890cde7dbd5f6be60f64f7ffe9e32a552525..a9614b9be58eba7afb2f9fae5bab6f79458e487d 100644 (file)
--- a/doc/install/install-postgresql.html.textile.liquid
+++ b/doc/install/install-postgresql.html.textile.liquid
@@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
  SPDX-License-Identifier: CC-BY-SA-3.0
  {% endcomment %}
  
-Arvados requires at least version *9.4* of PostgreSQL.
+Arvados requires at least version *9.4* of PostgreSQL. We recommend using version 10 or newer.
  
  * "AWS":#aws
  * "CentOS 7":#centos7
diff --git a/doc/sdk/cli/subcommands.html.textile.liquid b/doc/sdk/cli/subcommands.html.textile.liquid

index 50d5d89871a612b368c0c46e966ed5717faa7a6a..5dda77ab5ee65cdf3700be3404f53455c0c25f28 100644 (file)
--- a/doc/sdk/cli/subcommands.html.textile.liquid
+++ b/doc/sdk/cli/subcommands.html.textile.liquid
@@ -212,9 +212,10 @@ This is a frontend to @arv-get@.
  <notextile>
  <pre>
  $ <code class="userinput">arv keep get --help</code>
-usage: arv-get [-h] [--retries RETRIES]
+usage: arv-get [-h] [--retries RETRIES] [--version]
                 [--progress | --no-progress | --batch-progress]
-               [--hash HASH | --md5sum] [-n] [-r] [-f | --skip-existing]
+               [--hash HASH | --md5sum] [-n] [-r]
+               [-f | -v | --skip-existing | --strip-manifest] [--threads N]
                 locator [destination]
  
  Copy data from Keep to a local file or pipe.
@@ -223,19 +224,20 @@ positional arguments:
    locator            Collection locator, optionally with a file path or
                       prefix.
    destination        Local file or directory where the data is to be written.
-                     Default: /dev/stdout.
+                     Default: stdout.
  
  optional arguments:
    -h, --help         show this help message and exit
    --retries RETRIES  Maximum number of times to retry server requests that
-                     encounter temporary failures (e.g., server down). Default
-                     3.
+                     encounter temporary failures (e.g., server down).
+                     Default 3.
+  --version          Print version and exit.
    --progress         Display human-readable progress on stderr (bytes and, if
                       possible, percentage of total data size). This is the
                       default behavior when it is not expected to interfere
                       with the output: specifically, stderr is a tty _and_
-                     either stdout is not a tty, or output is being written to
-                     named files rather than stdout.
+                     either stdout is not a tty, or output is being written
+                     to named files rather than stdout.
    --no-progress      Do not display human-readable progress on stderr.
    --batch-progress   Display machine-readable progress on stderr (bytes and,
                       if known, total data size).
@@ -252,11 +254,19 @@ optional arguments:
    -f                 Overwrite existing files while writing. The default
                       behavior is to refuse to write *anything* if any of the
                       output files already exist. As a special case, -f is not
-                     needed to write to /dev/stdout.
-  --skip-existing    Skip files that already exist. The default behavior is to
-                     refuse to write *anything* if any files exist that would
-                     have to be overwritten. This option causes even devices,
-                     sockets, and fifos to be skipped.
+                     needed to write to stdout.
+  -v                 Once for verbose mode, twice for debug mode.
+  --skip-existing    Skip files that already exist. The default behavior is
+                     to refuse to write *anything* if any files exist that
+                     would have to be overwritten. This option causes even
+                     devices, sockets, and fifos to be skipped.
+  --strip-manifest   When getting a collection manifest, strip its access
+                     tokens before writing it.
+  --threads N        Set the number of download threads to be used. Take into
+                     account that using lots of threads will increase the RAM
+                     requirements. Default is to use 4 threads. On high
+                     latency installations, using a greater number will
+                     improve overall throughput.
  </pre>
  </notextile>
  
diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid

index dd78e989fd52afe4ddd24940a00d76634f546a2d..d6148d7eee1f3d2992a9d43bcb9c2ca44b3b68b5 100644 (file)
--- a/doc/user/cwl/cwl-extensions.html.textile.liquid
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -63,6 +63,9 @@ hints:
      cudaComputeCapabilityMin: "9.0"
      deviceCountMin: 1
      deviceCountMax: 1
+
+  arv:UsePreemptible:
+    usePreemptible: true
  {% endcodeblock %}
  
  h2(#RunInSingleContainer). arv:RunInSingleContainer
@@ -164,6 +167,14 @@ table(table table-bordered table-condensed).
  |deviceCountMin|integer|Minimum number of GPU devices to allocate on a single node. Required.|
  |deviceCountMax|integer|Maximum number of GPU devices to allocate on a single node. Optional.  If not specified, same as @minDeviceCount@.|
  
+h2(#UsePreemptible). arv:UsePreemptible
+
+Specify whether a workflow step should request preemptible (e.g. AWS Spot market) instances.  Such instances are generally cheaper, but can be taken back by the cloud provider at any time (preempted) causing the step to fail.  When this happens, Arvados will automatically re-try the step, up to the configuration value of @Containers.MaxRetryAttempts@ (default 3) times.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|usePreemptible|boolean|Required, true to opt-in to using preemptible instances, false to opt-out.|
+
  h2. arv:dockerCollectionPDH
  
  This is an optional extension field appearing on the standard @DockerRequirement@.  It specifies the portable data hash of the Arvados collection containing the Docker image.  If present, it takes precedence over @dockerPull@ or @dockerImageId@.
diff --git a/doc/user/cwl/cwl-run-options.html.textile.liquid b/doc/user/cwl/cwl-run-options.html.textile.liquid

index d331dad871570a7f03a6604a4bfcbe7598f1bacc..94e46ae1bc3487179dc1a72167d2d8bf94235085 100644 (file)
--- a/doc/user/cwl/cwl-run-options.html.textile.liquid
+++ b/doc/user/cwl/cwl-run-options.html.textile.liquid
@@ -63,6 +63,9 @@ table(table table-bordered table-condensed).
  |==--priority== PRIORITY|Workflow priority (range 1..1000, higher has precedence over lower)|
  |==--thread-count== THREAD_COUNT|Number of threads to use for container submit and output collection.|
  |==--http-timeout== HTTP_TIMEOUT|API request timeout in seconds. Default is 300 seconds (5 minutes).|
+|==--enable-preemptible==|Use preemptible instances. Control individual steps with "arv:UsePreemptible":cwl-extensions.html#UsePreemptible hint.|
+|==--disable-preemptible==|Don't use preemptible instances.|
+|==--skip-schemas==|Skip loading of extension schemas (the $schemas section).|
  |==--trash-intermediate==|Immediately trash intermediate outputs on workflow success.|
  |==--no-trash-intermediate==|Do not trash intermediate outputs (default).|
  
@@ -143,3 +146,19 @@ Using @--intermediate-output-ttl@ without @--trash-intermediate@ means that inte
  h3(#federation). Run workflow on a remote federated cluster
  
  By default, the workflow runner will run on the local (home) cluster.  Using @--submit-runner-cluster@ you can specify that the runner should be submitted to a remote federated cluster.  When doing this, @--project-uuid@ should specify a project on that cluster.  Steps making up the workflow will be submitted to the remote federated cluster by default, but the behavior of @arv:ClusterTarget@ is unchanged.  Note: when using this option, any resources that need to be uploaded in order to run the workflow (such as files or Docker images) will be uploaded to the local (home) cluster, and streamed to the federated cluster on demand.
+
+h3(#preemptible). Using preemptible (spot) instances
+
+Preemptible instances typically offer lower cost computation with a tradeoff of lower service guarantees.  If a compute node is preempted, Arvados will restart the computation on a new instance.
+
+If the sitewide configuration @Containers.AlwaysUsePreemptibleInstances@ is true, workflow steps will always select preemptible instances, regardless of user option.
+
+If @Containers.AlwaysUsePreemptibleInstances@ is false, you can request preemptible instances for a specific run with the @arvados-cwl-runner --enable-preemptible@ option.
+
+Within the workflow, you can control whether individual steps should be preemptible with the "arv:UsePreemptible":cwl-extensions.html#UsePreemptible hint.
+
+If a workflow requests preemptible instances with "arv:UsePreemptible":cwl-extensions.html#UsePreemptible , but you _do not_ want to use preemptible instances, you can override it for a specific run with the @arvados-cwl-runner --disable-preemptible@ option.
+
+h3(#gpu). Use CUDA GPU instances
+
+See "cwltool:CUDARequirement":cwl-extensions.html#CUDARequirement .
diff --git a/doc/user/cwl/cwl-style.html.textile.liquid b/doc/user/cwl/cwl-style.html.textile.liquid

index 853ed3b3e2be241d7e7c7dad9ae2c64312636449..303ae37e9e94b98d5694cd8de5c71930ca6196ce 100644 (file)
--- a/doc/user/cwl/cwl-style.html.textile.liquid
+++ b/doc/user/cwl/cwl-style.html.textile.liquid
@@ -13,7 +13,15 @@ h2(#performance). Performance
  
  To get the best perfomance from your workflows, be aware of the following Arvados features, behaviors, and best practices.
  
-Does your application support NVIDIA GPU acceleration?  Use "cwltool:CUDARequirement":cwl-extensions.html#CUDARequirement to request nodes with GPUs.
+h3. Does your application support NVIDIA GPU acceleration?
+
+Use "cwltool:CUDARequirement":cwl-extensions.html#CUDARequirement to request nodes with GPUs.
+
+h3. Trying to reduce costs?
+
+Try "using preemptible (spot) instances":cwl-run-options.html#preemptible .
+
+h3. You have a sequence of short-running steps
  
  If you have a sequence of short-running steps (less than 1-2 minutes each), use the Arvados extension "arv:RunInSingleContainer":cwl-extensions.html#RunInSingleContainer to avoid scheduling and data transfer overhead by running all the steps together in the same container on the same node.  To use this feature, @cwltool@ must be installed in the container image.  Example:
  
@@ -42,10 +50,16 @@ steps:
      run: subworkflow-with-short-steps.cwl
  {% endcodeblock %}
  
+h3. Avoid declaring @InlineJavascriptRequirement@ or @ShellCommandRequirement@
+
  Avoid declaring @InlineJavascriptRequirement@ or @ShellCommandRequirement@ unless you specifically need them.  Don't include them "just in case" because they change the default behavior and may add extra overhead.
  
+h3. Prefer text substitution to Javascript
+
  When combining a parameter value with a string, such as adding a filename extension, write @$(inputs.file.basename).ext@ instead of @$(inputs.file.basename + 'ext')@.  The first form is evaluated as a simple text substitution, the second form (using the @+@ operator) is evaluated as an arbitrary Javascript expression and requires that you declare @InlineJavascriptRequirement@.
  
+h3. Use @ExpressionTool@ to efficiently rearrange input files
+
  Use @ExpressionTool@ to efficiently rearrange input files between steps of a Workflow.  For example, the following expression accepts a directory containing files paired by @_R1_@ and @_R2_@ and produces an array of Directories containing each pair.
  
  {% codeblock as yaml %}
@@ -80,9 +94,13 @@ expression: |
    }
  {% endcodeblock %}
  
-Available compute nodes types vary over time and across different cloud providers, so try to limit the RAM requirement to what the program actually needs.  However, if you need to target a specific compute node type, see this discussion on "calculating RAM request and choosing instance type for containers.":{{site.baseurl}}/api/execution.html#RAM
+h3. Limit RAM requests to what you really need
+
+Available compute nodes types vary over time and across different cloud providers, so it is important to limit the RAM requirement to what the program actually needs.  However, if you need to target a specific compute node type, see this discussion on "calculating RAM request and choosing instance type for containers.":{{site.baseurl}}/api/execution.html#RAM
  
-Instead of scattering separate steps, prefer to scatter over a subworkflow.
+h3. Avoid scattering by step by step
+
+Instead of a scatter step that feeds into another scatter step, prefer to scatter over a subworkflow.
  
  With the following pattern, @step1@ has to wait for all samples to complete before @step2@ can start computing on any samples.  This means a single long-running sample can prevent the rest of the workflow from moving on:
  
@@ -148,10 +166,16 @@ h2. Portability
  
  To write workflows that are easy to modify and portable across CWL runners (in the event you need to share your workflow with others), there are several best practices to follow:
  
+h3. Always provide @DockerRequirement@
+
  Workflows should always provide @DockerRequirement@ in the @hints@ or @requirements@ section.
  
+h3. Build a reusable library of components
+
  Build a reusable library of components.  Share tool wrappers and subworkflows between projects.  Make use of and contribute to "community maintained workflows and tools":https://github.com/common-workflow-library and tool registries such as "Dockstore":http://dockstore.org .
  
+h3. Supply scripts as input parameters
+
  CommandLineTools wrapping custom scripts should represent the script as an input parameter with the script file as a default value.  Use @secondaryFiles@ for scripts that consist of multiple files.  For example:
  
  {% codeblock as yaml %}
@@ -180,22 +204,21 @@ outputs:
        glob: "*.fastq"
  {% endcodeblock %}
  
+h3. Getting the temporary and output directories
+
  You can get the designated temporary directory using @$(runtime.tmpdir)@ in your CWL file, or from the @$TMPDIR@ environment variable in your script.
  
  Similarly, you can get the designated output directory using $(runtime.outdir), or from the @HOME@ environment variable in your script.
  
-Avoid specifying resource requirements in CommandLineTool.  Prefer to specify them in the workflow.  You can provide a default resource requirement in the top level @hints@ section, and individual steps can override it with their own resource requirement.
+h3. Specifying @ResourceRequirement@
+
+Avoid specifying resources in the @requirements@ section of a @CommandLineTool@, put it in the @hints@ section instead.  This enables you to override the tool resource hint with a workflow step level requirement:
  
  {% codeblock as yaml %}
  cwlVersion: v1.0
  class: Workflow
  inputs:
    inp: File
-hints:
-  ResourceRequirement:
-    ramMin: 1000
-    coresMin: 1
-    tmpdirMin: 45000
  steps:
    step1:
      in: {inp: inp}
@@ -205,7 +228,7 @@ steps:
      in: {inp: step1/inp}
      out: [out]
      run: tool2.cwl
-    hints:
+    requirements:
        ResourceRequirement:
          ramMin: 2000
          coresMin: 2
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index 9800be70473fc8bf20f56ae5013bac099c80d9a5..6512389815dd4e3d1d786b09cf1cd5b3eedd929d 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -240,6 +240,18 @@ Clusters:
        # https://doc.arvados.org/admin/metadata-vocabulary.html
        VocabularyPath: ""
  
+      # If true, a project must have a non-empty description field in
+      # order to be frozen.
+      FreezeProjectRequiresDescription: false
+
+      # Project properties that must have non-empty values in order to
+      # freeze a project. Example: {"property_name": true}
+      FreezeProjectRequiresProperties: {}
+
+      # If true, only an admin user can un-freeze a project. If false,
+      # any user with "manage" permission can un-freeze.
+      UnfreezeProjectRequiresAdmin: false
+
      Users:
        # Config parameters to automatically setup new users.  If enabled,
        # this users will be able to self-activate.  Enable this if you want
@@ -903,14 +915,18 @@ Clusters:
        # If false, containers are scheduled on preemptible instances
        # only when requested by the submitter.
        #
-      # Note that arvados-cwl-runner does not currently offer a
-      # feature to request preemptible instances, so this value
-      # effectively acts as a cluster-wide decision about whether to
-      # use preemptible instances.
-      #
        # This flag is ignored if no preemptible instance types are
        # configured, and has no effect on top-level containers.
-      AlwaysUsePreemptibleInstances: true
+      AlwaysUsePreemptibleInstances: false
+
+      # Automatically add a preemptible variant for every
+      # non-preemptible entry in InstanceTypes below. The maximum bid
+      # price for the preemptible variant will be the non-preemptible
+      # price multiplied by PreemptiblePriceFactor. If 0, preemptible
+      # variants are not added automatically.
+      #
+      # A price factor of 1.0 is a reasonable starting point.
+      PreemptiblePriceFactor: 0
  
        # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
        # cloud dispatcher for executing containers on worker VMs.
diff --git a/lib/config/export.go b/lib/config/export.go

index 4e903a8b3d39398b80faf08199b77d9f146165b6..dae749c87470ecb5f4ab7c496ddf001c11abcdf3 100644 (file)
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -62,6 +62,8 @@ var whitelist = map[string]bool{
         "API":                                      true,
         "API.AsyncPermissionsUpdateInterval":       false,
         "API.DisabledAPIs":                         false,
+       "API.FreezeProjectRequiresDescription":     true,
+       "API.FreezeProjectRequiresProperties":      true,
         "API.KeepServiceRequestTimeout":            false,
         "API.MaxConcurrentRequests":                false,
         "API.MaxIndexDatabaseRead":                 false,
@@ -72,6 +74,7 @@ var whitelist = map[string]bool{
         "API.MaxTokenLifetime":                     false,
         "API.RequestTimeout":                       true,
         "API.SendTimeout":                          true,
+       "API.UnfreezeProjectRequiresAdmin":         true,
         "API.VocabularyPath":                       false,
         "API.WebsocketClientEventQueue":            false,
         "API.WebsocketServerEventQueue":            false,
@@ -130,6 +133,7 @@ var whitelist = map[string]bool{
         "Containers.MaxDispatchAttempts":           false,
         "Containers.MaxRetryAttempts":              true,
         "Containers.MinRetryPeriod":                true,
+       "Containers.PreemptiblePriceFactor":        false,
         "Containers.ReserveExtraRAM":               true,
         "Containers.RuntimeEngine":                 true,
         "Containers.ShellAccess":                   true,
diff --git a/lib/config/load.go b/lib/config/load.go

index 8d498af170f2180881fac496c82900b1bd764d7f..5afb51c5adcfd6dce89d069670b9ce3aadde2a2e 100644 (file)
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -285,6 +285,19 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                 }
         }
  
+       // Preprocess/automate some configs
+       for id, cc := range cfg.Clusters {
+               ldr.autofillPreemptible("Clusters."+id, &cc)
+
+               if strings.Count(cc.Users.AnonymousUserToken, "/") == 3 {
+                       // V2 token, strip it to just a secret
+                       tmp := strings.Split(cc.Users.AnonymousUserToken, "/")
+                       cc.Users.AnonymousUserToken = tmp[2]
+               }
+
+               cfg.Clusters[id] = cc
+       }
+
         // Check for known mistakes
         for id, cc := range cfg.Clusters {
                 for remote := range cc.RemoteClusters {
@@ -316,11 +329,6 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                                 return nil, err
                         }
                 }
-               if strings.Count(cc.Users.AnonymousUserToken, "/") == 3 {
-                       // V2 token, strip it to just a secret
-                       tmp := strings.Split(cc.Users.AnonymousUserToken, "/")
-                       cc.Users.AnonymousUserToken = tmp[2]
-               }
         }
         return &cfg, nil
  }
@@ -361,10 +369,12 @@ func (ldr *Loader) checkToken(label, token string, mandatory bool, acceptV2 bool
                 if !strings.HasPrefix(token, "v2/") {
                         return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
                 }
-               ldr.Logger.Warnf("%s: token is a full V2 token, should just be a secret (remove everything up to and including the last forward slash)", label)
                 if !acceptableTokenRe.MatchString(tmp[2]) {
                         return fmt.Errorf("%s: unacceptable characters in V2 token secret (only a-z, A-Z, 0-9 are acceptable)", label)
                 }
+               if len(tmp[2]) < acceptableTokenLength {
+                       ldr.Logger.Warnf("%s: secret is too short (should be at least %d characters)", label, acceptableTokenLength)
+               }
         } else if len(token) < acceptableTokenLength {
                 if ldr.Logger != nil {
                         ldr.Logger.Warnf("%s: token is too short (should be at least %d characters)", label, acceptableTokenLength)
@@ -527,3 +537,21 @@ func (ldr *Loader) logExtraKeys(expected, supplied map[string]interface{}, prefi
                 }
         }
  }
+
+func (ldr *Loader) autofillPreemptible(label string, cc *arvados.Cluster) {
+       if factor := cc.Containers.PreemptiblePriceFactor; factor > 0 {
+               for name, it := range cc.InstanceTypes {
+                       if !it.Preemptible {
+                               it.Preemptible = true
+                               it.Price = it.Price * factor
+                               it.Name = name + ".preemptible"
+                               if it2, exists := cc.InstanceTypes[it.Name]; exists && it2 != it {
+                                       ldr.Logger.Warnf("%s.InstanceTypes[%s]: already exists, so not automatically adding a preemptible variant of %s", label, it.Name, name)
+                                       continue
+                               }
+                               cc.InstanceTypes[it.Name] = it
+                       }
+               }
+       }
+
+}
diff --git a/lib/config/load_test.go b/lib/config/load_test.go

index 1ede805b0085a668b40169af458243a304c12e93..5270dcccce8b9d95b5b5fdb1e6fe9ad15f2e0426 100644 (file)
--- a/lib/config/load_test.go
+++ b/lib/config/load_test.go
@@ -305,8 +305,6 @@ func (s *LoadSuite) TestNoUnrecognizedKeysInDefaultConfig(c *check.C) {
  
  func (s *LoadSuite) TestNoWarningsForDumpedConfig(c *check.C) {
         var logbuf bytes.Buffer
-       logger := logrus.New()
-       logger.Out = &logbuf
         cfg, err := testLoader(c, `
  Clusters:
   zzzzz:
@@ -695,3 +693,72 @@ Clusters:
         _, err = ldr.Load()
         c.Assert(err, check.ErrorMatches, `there is no default storage class.*`)
  }
+
+func (s *LoadSuite) TestPreemptiblePriceFactor(c *check.C) {
+       yaml := `
+Clusters:
+ z1111:
+  InstanceTypes:
+   Type1:
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+ z2222:
+  Containers:
+   PreemptiblePriceFactor: 0.5
+  InstanceTypes:
+   Type1:
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+ z3333:
+  Containers:
+   PreemptiblePriceFactor: 0.5
+  InstanceTypes:
+   Type1:
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+   Type1.preemptible: # higher price than the auto-added variant would use -- should generate warning
+    ProviderType: Type1
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+    Preemptible: true
+   Type2:
+    RAM: 23456M
+    VCPUs: 16
+    Price: 2.46
+   Type2.preemptible: # identical to the auto-added variant -- so no warning
+    ProviderType: Type2
+    RAM: 23456M
+    VCPUs: 16
+    Price: 1.23
+    Preemptible: true
+`
+       var logbuf bytes.Buffer
+       cfg, err := testLoader(c, yaml, &logbuf).Load()
+       c.Assert(err, check.IsNil)
+       cc, err := cfg.GetCluster("z1111")
+       c.Assert(err, check.IsNil)
+       c.Check(cc.InstanceTypes["Type1"].Price, check.Equals, 1.23)
+       c.Check(cc.InstanceTypes, check.HasLen, 1)
+
+       cc, err = cfg.GetCluster("z2222")
+       c.Assert(err, check.IsNil)
+       c.Check(cc.InstanceTypes["Type1"].Preemptible, check.Equals, false)
+       c.Check(cc.InstanceTypes["Type1"].Price, check.Equals, 1.23)
+       c.Check(cc.InstanceTypes["Type1.preemptible"].Preemptible, check.Equals, true)
+       c.Check(cc.InstanceTypes["Type1.preemptible"].Price, check.Equals, 1.23/2)
+       c.Check(cc.InstanceTypes["Type1.preemptible"].ProviderType, check.Equals, "Type1")
+       c.Check(cc.InstanceTypes, check.HasLen, 2)
+
+       cc, err = cfg.GetCluster("z3333")
+       c.Assert(err, check.IsNil)
+       // Don't overwrite the explicitly configured preemptible variant
+       c.Check(cc.InstanceTypes["Type1.preemptible"].Price, check.Equals, 1.23)
+       c.Check(cc.InstanceTypes, check.HasLen, 4)
+       c.Check(logbuf.String(), check.Matches, `(?ms).*Clusters\.z3333\.InstanceTypes\[Type1\.preemptible\]: already exists, so not automatically adding a preemptible variant of Type1.*`)
+       c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*Type2\.preemptible.*`)
+       c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*(z1111|z2222)[^\n]*InstanceTypes.*`)
+}
diff --git a/lib/controller/federation/conn.go b/lib/controller/federation/conn.go

index d3819f6262df8f7df4134753d0359e1d04e12950..1b8ec9e64a6e01138a1bfc58a599a78eb2f0e44b 100644 (file)
--- a/lib/controller/federation/conn.go
+++ b/lib/controller/federation/conn.go
@@ -69,14 +69,17 @@ func saltedTokenProvider(cluster *arvados.Cluster, local backend, remoteID strin
                         return nil, errors.New("no token provided")
                 }
                 for _, token := range incoming.Tokens {
-                       if strings.HasPrefix(token, "v2/"+cluster.ClusterID+"-") && remoteID == cluster.Login.LoginCluster {
-                               // If we did this, the login cluster
-                               // would call back to us and then
-                               // reject our response because the
-                               // user UUID prefix (i.e., the
-                               // LoginCluster prefix) won't match
-                               // the token UUID prefix (i.e., our
-                               // prefix).
+                       if strings.HasPrefix(token, "v2/"+cluster.ClusterID+"-") &&
+                               !strings.HasPrefix(token, "v2/"+cluster.ClusterID+"-gj3su-anonymouspublic/") &&
+                               remoteID == cluster.Login.LoginCluster {
+                               // If we did this, the login cluster would call back to us and then
+                               // reject our response because the user UUID prefix (i.e., the
+                               // LoginCluster prefix) won't match the token UUID prefix (i.e., our
+                               // prefix). The anonymous token is OK to forward, because (unlike other
+                               // local tokens for real users) the validation callback will return the
+                               // locally issued anonymous user ID instead of a login-cluster user ID.
+                               // That anonymous user ID gets mapped to the local anonymous user
+                               // automatically on the login cluster.
                                 return nil, httpErrorf(http.StatusUnauthorized, "cannot use a locally issued token to forward a request to our login cluster (%s)", remoteID)
                         }
                         salted, err := auth.SaltToken(token, remoteID)
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go

index 723e1011f9de62ac871566ec21c4488d1f55b360..5e467cb0588607d3deaa06c1d92326ed18f8f09c 100644 (file)
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -5,9 +5,11 @@
  package controller
  
  import (
+       "bytes"
         "context"
         "crypto/tls"
         "encoding/json"
+       "io"
         "io/ioutil"
         "net/http"
         "net/http/httptest"
@@ -36,13 +38,15 @@ var _ = check.Suite(&HandlerSuite{})
  type HandlerSuite struct {
         cluster *arvados.Cluster
         handler *Handler
+       logbuf  *bytes.Buffer
         ctx     context.Context
         cancel  context.CancelFunc
  }
  
  func (s *HandlerSuite) SetUpTest(c *check.C) {
+       s.logbuf = &bytes.Buffer{}
         s.ctx, s.cancel = context.WithCancel(context.Background())
-       s.ctx = ctxlog.Context(s.ctx, ctxlog.New(os.Stderr, "json", "debug"))
+       s.ctx = ctxlog.Context(s.ctx, ctxlog.New(io.MultiWriter(os.Stderr, s.logbuf), "json", "debug"))
         s.cluster = &arvados.Cluster{
                 ClusterID:  "zzzzz",
                 PostgreSQL: integrationTestCluster().PostgreSQL,
@@ -317,6 +321,16 @@ func (s *HandlerSuite) TestValidateRemoteToken(c *check.C) {
         }
  }
  
+func (s *HandlerSuite) TestLogTokenUUID(c *check.C) {
+       req := httptest.NewRequest("GET", "https://0.0.0.0/arvados/v1/users/current", nil)
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+       req = req.WithContext(s.ctx)
+       resp := httptest.NewRecorder()
+       httpserver.LogRequests(s.handler).ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(s.logbuf.String(), check.Matches, `(?ms).*"tokenUUIDs":\["`+strings.Split(arvadostest.ActiveTokenV2, "/")[1]+`"\].*`)
+}
+
  func (s *HandlerSuite) TestCreateAPIToken(c *check.C) {
         req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
         auth, err := s.handler.createAPItoken(req, arvadostest.ActiveUserUUID, nil)
@@ -367,16 +381,14 @@ func (s *HandlerSuite) CheckObjectType(c *check.C, url string, token string, ski
         for k := range direct {
                 if _, ok := skippedFields[k]; ok {
                         continue
-               } else if val, ok := proxied[k]; ok {
-                       if direct["kind"] == "arvados#collection" && k == "manifest_text" {
-                               // Tokens differ from request to request
-                               c.Check(strings.Split(val.(string), "+A")[0], check.Equals, strings.Split(direct[k].(string), "+A")[0])
-                       } else {
-                               c.Check(val, check.DeepEquals, direct[k],
-                                       check.Commentf("RailsAPI %s key %q's value %q differs from controller's %q.", direct["kind"], k, direct[k], val))
-                       }
-               } else {
+               } else if val, ok := proxied[k]; !ok {
                         c.Errorf("%s's key %q missing on controller's response.", direct["kind"], k)
+               } else if direct["kind"] == "arvados#collection" && k == "manifest_text" {
+                       // Tokens differ from request to request
+                       c.Check(strings.Split(val.(string), "+A")[0], check.Equals, strings.Split(direct[k].(string), "+A")[0])
+               } else {
+                       c.Check(val, check.DeepEquals, direct[k],
+                               check.Commentf("RailsAPI %s key %q's value %q differs from controller's %q.", direct["kind"], k, direct[k], val))
                 }
         }
  }
diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go

index 9f5d12598b70e31728fe29fb28ae4fa8ce58ec4e..b71c4afb55ba9a5ac9d3fb57efdb050d55c7a34b 100644 (file)
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -379,6 +379,56 @@ func (s *IntegrationSuite) TestGetCollectionAsAnonymous(c *check.C) {
         c.Check(coll.PortableDataHash, check.Equals, pdh)
  }
  
+// z3333 should forward the locally-issued anonymous user token to its login
+// cluster z1111. That is no problem because the login cluster controller will
+// map any anonymous user token to its local anonymous user.
+//
+// This needs to work because wb1 has a tendency to slap the local anonymous
+// user token on every request as a reader_token, which gets folded into the
+// request token list controller.
+//
+// Use a z1111 user token and the anonymous token from z3333 passed in as a
+// reader_token to do a request on z3333, asking for the z1111 anonymous user
+// object. The request will be forwarded to the z1111 cluster. The presence of
+// the z3333 anonymous user token should not prohibit the request from being
+// forwarded.
+func (s *IntegrationSuite) TestForwardAnonymousTokenToLoginCluster(c *check.C) {
+       conn1 := s.testClusters["z1111"].Conn()
+       s.testClusters["z3333"].Conn()
+
+       rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+       _, anonac3, _ := s.testClusters["z3333"].AnonymousClients()
+
+       // Make a user connection to z3333 (using a z1111 user, because that's the login cluster)
+       _, userac1, _, _ := s.testClusters["z3333"].UserClients(rootctx1, c, conn1, "user@example.com", true)
+
+       // Get the anonymous user token for z3333
+       var anon3Auth arvados.APIClientAuthorization
+       err := anonac3.RequestAndDecode(&anon3Auth, "GET", "/arvados/v1/api_client_authorizations/current", nil, nil)
+       c.Check(err, check.IsNil)
+
+       var userList arvados.UserList
+       where := make(map[string]string)
+       where["uuid"] = "z1111-tpzed-anonymouspublic"
+       err = userac1.RequestAndDecode(&userList, "GET", "/arvados/v1/users", nil,
+               map[string]interface{}{
+                       "reader_tokens": []string{anon3Auth.TokenV2()},
+                       "where":         where,
+               },
+       )
+       // The local z3333 anonymous token must be allowed to be forwarded to the login cluster
+       c.Check(err, check.IsNil)
+
+       userac1.AuthToken = "v2/z1111-gj3su-asdfasdfasdfasd/this-token-does-not-validate-so-anonymous-token-will-be-used-instead"
+       err = userac1.RequestAndDecode(&userList, "GET", "/arvados/v1/users", nil,
+               map[string]interface{}{
+                       "reader_tokens": []string{anon3Auth.TokenV2()},
+                       "where":         where,
+               },
+       )
+       c.Check(err, check.IsNil)
+}
+
  // Get a token from the login cluster (z1111), use it to submit a
  // container request on z2222.
  func (s *IntegrationSuite) TestCreateContainerRequestWithFedToken(c *check.C) {
diff --git a/lib/controller/router/response.go b/lib/controller/router/response.go

index c0c599be8bcd4f1acb6fbbc5ed90e6541cf08dfe..42b34355935db015e72d7de3e8b7a6398ffc0680 100644 (file)
--- a/lib/controller/router/response.go
+++ b/lib/controller/router/response.go
@@ -208,7 +208,7 @@ func (rtr *router) mungeItemFields(tmp map[string]interface{}) {
                 // they appear in responses as null, rather than a
                 // zero value.
                 switch k {
-               case "output_uuid", "output_name", "log_uuid", "description", "requesting_container_uuid", "container_uuid":
+               case "output_uuid", "output_name", "log_uuid", "description", "requesting_container_uuid", "container_uuid", "modified_by_client_uuid", "frozen_by_uuid":
                         if v == "" {
                                 tmp[k] = nil
                         }
diff --git a/lib/controller/router/router.go b/lib/controller/router/router.go

index 2cfcc4fc28287c8ee44166277ecaffe23d42e293..05bdb4754f0a860ac552867d42bf6e30af9eb4d6 100644 (file)
--- a/lib/controller/router/router.go
+++ b/lib/controller/router/router.go
@@ -588,6 +588,23 @@ func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() int
                         "apiOptsType": fmt.Sprintf("%T", opts),
                         "apiOpts":     opts,
                 }).Debug("exec")
+               // Extract the token UUIDs (or a placeholder for v1 tokens)
+               var tokenUUIDs []string
+               for _, t := range creds.Tokens {
+                       if strings.HasPrefix(t, "v2/") {
+                               tokenParts := strings.Split(t, "/")
+                               if len(tokenParts) >= 3 {
+                                       tokenUUIDs = append(tokenUUIDs, tokenParts[1])
+                               }
+                       } else {
+                               end := t
+                               if len(t) > 5 {
+                                       end = t[len(t)-5:]
+                               }
+                               tokenUUIDs = append(tokenUUIDs, "v1 token ending in "+end)
+                       }
+               }
+               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{"tokenUUIDs": tokenUUIDs})
                 resp, err := exec(ctx, opts)
                 if err != nil {
                         logger.WithError(err).Debugf("returning error type %T", err)
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go

index 4fa3f26ab54dc4fb5a0fba4cdc1a12fe4a3a94d0..65f43e96440aa57508cb7e2a80af99e455420120 100644 (file)
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -19,6 +19,7 @@ import (
         "os"
         "os/exec"
         "os/signal"
+       "os/user"
         "path"
         "path/filepath"
         "regexp"
@@ -1475,6 +1476,7 @@ func (runner *ContainerRunner) NewArvLogWriter(name string) (io.WriteCloser, err
  // Run the full container lifecycle.
  func (runner *ContainerRunner) Run() (err error) {
         runner.CrunchLog.Printf("crunch-run %s started", cmd.Version.String())
+       runner.CrunchLog.Printf("%s", currentUserAndGroups())
         runner.CrunchLog.Printf("Executing container '%s' using %s runtime", runner.Container.UUID, runner.executor.Runtime())
  
         hostname, hosterr := os.Hostname()
@@ -2045,3 +2047,30 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er
         os.Setenv("ARVADOS_KEEP_SERVICES", url)
         return cmd, nil
  }
+
+// return current uid, gid, groups in a format suitable for logging:
+// "crunch-run process has uid=1234(arvados) gid=1234(arvados)
+// groups=1234(arvados),114(fuse)"
+func currentUserAndGroups() string {
+       u, err := user.Current()
+       if err != nil {
+               return fmt.Sprintf("error getting current user ID: %s", err)
+       }
+       s := fmt.Sprintf("crunch-run process has uid=%s(%s) gid=%s", u.Uid, u.Username, u.Gid)
+       if g, err := user.LookupGroupId(u.Gid); err == nil {
+               s += fmt.Sprintf("(%s)", g.Name)
+       }
+       s += " groups="
+       if gids, err := u.GroupIds(); err == nil {
+               for i, gid := range gids {
+                       if i > 0 {
+                               s += ","
+                       }
+                       s += gid
+                       if g, err := user.LookupGroupId(gid); err == nil {
+                               s += fmt.Sprintf("(%s)", g.Name)
+                       }
+               }
+       }
+       return s
+}
diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go

index 26f78d2bf7d537c7a44f0b8dd57eda4355211b5b..62df0032b40800e9b2c2eb59ed4c42e639f9e9a1 100644 (file)
--- a/lib/crunchrun/crunchrun_test.go
+++ b/lib/crunchrun/crunchrun_test.go
@@ -885,6 +885,7 @@ func (s *TestSuite) TestLogVersionAndRuntime(c *C) {
  
         c.Assert(s.api.Logs["crunch-run"], NotNil)
         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run \S+ \(go\S+\) start.*`)
+       c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*crunch-run process has uid=\d+\(.+\) gid=\d+\(.+\) groups=\d+\(.+\)(,\d+\(.+\))*\n.*`)
         c.Check(s.api.Logs["crunch-run"].String(), Matches, `(?ms).*Executing container 'zzzzz-zzzzz-zzzzzzzzzzzzzzz' using stub runtime.*`)
  }
  
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

index 826467cc09397342c8d0fa32bfe3b4ed8dd10124..c73b358eccfb19211ce5a077d56ac995d30a40c0 100644 (file)
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -213,6 +213,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
      parser.add_argument("--http-timeout", type=int,
                          default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
  
+    exgroup = parser.add_mutually_exclusive_group()
+    exgroup.add_argument("--enable-preemptible", dest="enable_preemptible", default=None, action="store_true", help="Use preemptible instances. Control individual steps with arv:UsePreemptible hint.")
+    exgroup.add_argument("--disable-preemptible", dest="enable_preemptible", default=None, action="store_false", help="Don't use preemptible instances.")
+
      parser.add_argument(
          "--skip-schemas",
          action="store_true",
@@ -255,7 +259,8 @@ def add_arv_hints():
          "http://arvados.org/cwl#ClusterTarget",
          "http://arvados.org/cwl#OutputStorageClass",
          "http://arvados.org/cwl#ProcessProperties",
-        "http://commonwl.org/cwltool#CUDARequirement"
+        "http://commonwl.org/cwltool#CUDARequirement",
+        "http://arvados.org/cwl#UsePreemptible",
      ])
  
  def exit_signal_handler(sigcode, frame):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml

index 6e2d4f1d92ab9471dd1bcc441b360eed12cc6a2c..af75481431b6cad88a3b346f180544617654045a 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml
@@ -385,3 +385,18 @@ $graph:
        doc: |
          Maximum number of GPU devices to request.  If not specified,
          same as `cudaDeviceCountMin`.
+
+- name: UsePreemptible
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:UsePreemptible"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    usePreemptible: boolean
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml

index 0e81347d72869253c9bdadae70ae93f498cb7d25..0ae451ccaac78e8173e0278d9745038d66b47b0e 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml
@@ -328,3 +328,18 @@ $graph:
        doc: |
          Maximum number of GPU devices to request.  If not specified,
          same as `cudaDeviceCountMin`.
+
+- name: UsePreemptible
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:UsePreemptible"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    usePreemptible: boolean
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml

index e9f70bf1cf63616408e78f7d4278ae6b3e4e4c9f..de5e55ca01164fa3d86454cdcf1d249a66018e2e 100644 (file)
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml
@@ -330,3 +330,18 @@ $graph:
        doc: |
          Maximum number of GPU devices to request.  If not specified,
          same as `cudaDeviceCountMin`.
+
+- name: UsePreemptible
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:UsePreemptible"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    usePreemptible: boolean
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py

index 2a5ff3a13a834861c846769af9407b8b1de10c2a..8c468dd22d09046bdff1b1f2152197ebdbe5c3ed 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -300,6 +300,17 @@ class ArvadosContainer(JobBase):
                  "hardware_capability": aslist(cuda_req["cudaComputeCapability"])[0]
              }
  
+        if runtimeContext.enable_preemptible is False:
+            scheduling_parameters["preemptible"] = False
+        else:
+            preemptible_req, _ = self.get_requirement("http://arvados.org/cwl#UsePreemptible")
+            if preemptible_req:
+                scheduling_parameters["preemptible"] = preemptible_req["usePreemptible"]
+            elif runtimeContext.enable_preemptible is True:
+                scheduling_parameters["preemptible"] = True
+            elif runtimeContext.enable_preemptible is None:
+                pass
+
          if self.timelimit is not None and self.timelimit > 0:
              scheduling_parameters["max_run_time"] = self.timelimit
  
@@ -550,6 +561,12 @@ class RunnerContainer(Runner):
          if self.enable_dev:
              command.append("--enable-dev")
  
+        if runtimeContext.enable_preemptible is True:
+            command.append("--enable-preemptible")
+
+        if runtimeContext.enable_preemptible is False:
+            command.append("--disable-preemptible")
+
          command.extend([workflowpath, "/var/lib/cwl/cwl.input.json"])
  
          container_req["command"] = command
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py

index 4239dd3b51fea8e51d3cdaf33116595cc2748f34..316250106b09cdd248d0ddf7b292cbfc1881a700 100644 (file)
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -37,6 +37,7 @@ class ArvRuntimeContext(RuntimeContext):
          self.always_submit_runner = False
          self.collection_cache_size = 256
          self.match_local_docker = False
+        self.enable_preemptible = None
  
          super(ArvRuntimeContext, self).__init__(kwargs)
  
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py

index ad17950a2fb6bab90376ecb0fe688af7a80f2b94..38e2c4d806f36ed80f9dac70da8cf37006f3de19 100644 (file)
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -40,7 +40,7 @@ import schema_salad.validate as validate
  
  import arvados.collection
  from .util import collectionUUID
-import ruamel.yaml as yaml
+from ruamel.yaml import YAML
  from ruamel.yaml.comments import CommentedMap, CommentedSeq
  
  import arvados_cwl.arvdocker
@@ -265,7 +265,8 @@ def upload_dependencies(arvrunner, name, document_loader,
                  textIO = StringIO(text.decode('utf-8'))
              else:
                  textIO = StringIO(text)
-            return yaml.safe_load(textIO)
+            yamlloader = YAML(typ='safe', pure=True)
+            return yamlloader.load(textIO)
          else:
              return {}
  
diff --git a/sdk/cwl/test_with_arvbox.sh b/sdk/cwl/test_with_arvbox.sh

index 0021bc8d906c5531b70c79a87d9be169658b5c57..d38414fc811d433acb5751613c9974f1bbc0b0c5 100755 (executable)
--- a/sdk/cwl/test_with_arvbox.sh
+++ b/sdk/cwl/test_with_arvbox.sh
@@ -118,14 +118,15 @@ elif [[ "$suite" =~ conformance-(.*) ]] ; then
       git clone https://github.com/common-workflow-language/cwl-\${version}.git
     fi
     cd cwl-\${version}
+   git checkout \${version}.0
  elif [[ "$suite" != "integration" ]] ; then
     echo "ERROR: unknown suite '$suite'"
     exit 1
  fi
  
-if [[ "$suite" != "integration" ]] ; then
-  git pull
-fi
+#if [[ "$suite" != "integration" ]] ; then
+#  git pull
+#fi
  
  export ARVADOS_API_HOST=localhost:8000
  export ARVADOS_API_HOST_INSECURE=1
@@ -154,18 +155,6 @@ else
    arv-keepdocker arvados/jobs latest
  fi
  
-cat >/tmp/cwltest/arv-cwl-jobs <<EOF2
-#!/bin/sh
-exec arvados-cwl-runner --api=jobs \\\$@
-EOF2
-chmod +x /tmp/cwltest/arv-cwl-jobs
-
-cat >/tmp/cwltest/arv-cwl-containers <<EOF2
-#!/bin/sh
-exec arvados-cwl-runner --api=containers \\\$@
-EOF2
-chmod +x /tmp/cwltest/arv-cwl-containers
-
  EXTRA=--compute-checksum
  
  if [[ $devcwl -eq 1 ]] ; then
@@ -176,8 +165,10 @@ env
  if [[ "$suite" = "integration" ]] ; then
     cd /usr/src/arvados/sdk/cwl/tests
     exec ./arvados-tests.sh $@
+elif [[ "$suite" = "conformance-v1.2" ]] ; then
+   exec cwltest --tool arvados-cwl-runner --test conformance_tests.yaml -N307 $@ -- \$EXTRA
  else
-   exec ./run_test.sh RUNNER=/tmp/cwltest/arv-cwl-${runapi} EXTRA="\$EXTRA" $@
+   exec ./run_test.sh RUNNER=arvados-cwl-runner EXTRA="\$EXTRA" $@
  fi
  EOF
  
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py

index 3de90c8d8810766399b08934c5c2db312926408d..798c5af289322ce2ae23edc26ca7d8a863d50186 100644 (file)
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -1234,6 +1234,103 @@ class TestContainer(unittest.TestCase):
                  body=JsonDiffMatcher(container_request))
  
  
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+    def test_run_preemptible_hint(self, keepdocker):
+        arvados_cwl.add_arv_hints()
+        for enable_preemptible in (None, True, False):
+            for preemptible_hint in (None, True, False):
+                arv_docker_clear_cache()
+
+                runner = mock.MagicMock()
+                runner.ignore_docker_for_reuse = False
+                runner.intermediate_output_ttl = 0
+                runner.secret_store = cwltool.secrets.SecretStore()
+                runner.api._rootDesc = {"revision": "20210628"}
+
+                keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
+                runner.api.collections().get().execute.return_value = {
+                    "portable_data_hash": "99999999999999999999999999999993+99"}
+
+                if preemptible_hint is not None:
+                    hints = [{
+                        "class": "http://arvados.org/cwl#UsePreemptible",
+                        "usePreemptible": preemptible_hint
+                    }]
+                else:
+                    hints = []
+
+                tool = cmap({
+                    "inputs": [],
+                    "outputs": [],
+                    "baseCommand": "ls",
+                    "arguments": [{"valueFrom": "$(runtime.outdir)"}],
+                    "id": "",
+                    "class": "CommandLineTool",
+                    "cwlVersion": "v1.2",
+                    "hints": hints
+                })
+
+                loadingContext, runtimeContext = self.helper(runner)
+
+                runtimeContext.name = 'test_run_enable_preemptible_'+str(enable_preemptible)+str(preemptible_hint)
+                runtimeContext.enable_preemptible = enable_preemptible
+
+                arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
+                arvtool.formatgraph = None
+
+                # Test the interactions between --enable/disable-preemptible
+                # and UsePreemptible hint
+
+                if enable_preemptible is None:
+                    if preemptible_hint is None:
+                        sched = {}
+                    else:
+                        sched = {'preemptible': preemptible_hint}
+                else:
+                    if preemptible_hint is None:
+                        sched = {'preemptible': enable_preemptible}
+                    else:
+                        sched = {'preemptible': enable_preemptible and preemptible_hint}
+
+                for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+                    j.run(runtimeContext)
+                    runner.api.container_requests().create.assert_called_with(
+                        body=JsonDiffMatcher({
+                            'environment': {
+                                'HOME': '/var/spool/cwl',
+                                'TMPDIR': '/tmp'
+                            },
+                            'name': runtimeContext.name,
+                            'runtime_constraints': {
+                                'vcpus': 1,
+                                'ram': 268435456
+                            },
+                            'use_existing': True,
+                            'priority': 500,
+                            'mounts': {
+                                '/tmp': {'kind': 'tmp',
+                                         "capacity": 1073741824
+                                     },
+                                '/var/spool/cwl': {'kind': 'tmp',
+                                                   "capacity": 1073741824 }
+                            },
+                            'state': 'Committed',
+                            'output_name': 'Output for step '+runtimeContext.name,
+                            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                            'output_path': '/var/spool/cwl',
+                            'output_ttl': 0,
+                            'container_image': '99999999999999999999999999999993+99',
+                            'command': ['ls', '/var/spool/cwl'],
+                            'cwd': '/var/spool/cwl',
+                            'scheduling_parameters': sched,
+                            'properties': {},
+                            'secret_mounts': {},
+                            'output_storage_classes': ["default"]
+                        }))
+
+
  
  class TestWorkflow(unittest.TestCase):
      def setUp(self):
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py

index 10443359b99bf02a51163d8eb38924579d14154a..61892bf2a447395708359a7da7e3bf4798603626 100644 (file)
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -1468,6 +1468,49 @@ class TestSubmit(unittest.TestCase):
          self.assertEqual(exited, 0)
  
  
+    @stubs
+    def test_submit_enable_preemptible(self, stubs):
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--enable-preemptible",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers',
+                        '--no-log-timestamps', '--disable-validate', '--disable-color',
+                        '--eval-timeout=20', '--thread-count=0',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
+                                       '--enable-preemptible',
+                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
+
+    @stubs
+    def test_submit_disable_preemptible(self, stubs):
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--disable-preemptible",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers',
+                        '--no-log-timestamps', '--disable-validate', '--disable-color',
+                        '--eval-timeout=20', '--thread-count=0',
+                        '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
+                                       '--disable-preemptible',
+                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(stubs.capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
+
+
  class TestCreateWorkflow(unittest.TestCase):
      existing_workflow_uuid = "zzzzz-7fd4e-validworkfloyml"
      expect_workflow = StripYAMLComments(
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go

index 5ec828667fc940ace2c3f59b6cdc643139ae3b14..24d5ac3e335c824f5ea4a444c6066ce37f3cc86f 100644 (file)
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -12,6 +12,7 @@ import (
         "errors"
         "fmt"
         "io"
+       "io/fs"
         "io/ioutil"
         "log"
         "net/http"
@@ -102,11 +103,60 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
  }
  
  // NewClientFromEnv creates a new Client that uses the default HTTP
-// client with the API endpoint and credentials given by the
-// ARVADOS_API_* environment variables.
+// client, and loads API endpoint and credentials from ARVADOS_*
+// environment variables (if set) and
+// $HOME/.config/arvados/settings.conf (if readable).
+//
+// If a config exists in both locations, the environment variable is
+// used.
+//
+// If there is an error (other than ENOENT) reading settings.conf,
+// NewClientFromEnv logs the error to log.Default(), then proceeds as
+// if settings.conf did not exist.
+//
+// Space characters are trimmed when reading the settings file, so
+// these are equivalent:
+//
+//   ARVADOS_API_HOST=localhost\n
+//   ARVADOS_API_HOST=localhost\r\n
+//   ARVADOS_API_HOST = localhost \n
+//   \tARVADOS_API_HOST = localhost\n
  func NewClientFromEnv() *Client {
+       vars := map[string]string{}
+       home := os.Getenv("HOME")
+       conffile := home + "/.config/arvados/settings.conf"
+       if home == "" {
+               // no $HOME => just use env vars
+       } else if settings, err := os.ReadFile(conffile); errors.Is(err, fs.ErrNotExist) {
+               // no config file => just use env vars
+       } else if err != nil {
+               // config file unreadable => log message, then use env vars
+               log.Printf("continuing without loading %s: %s", conffile, err)
+       } else {
+               for _, line := range bytes.Split(settings, []byte{'\n'}) {
+                       kv := bytes.SplitN(line, []byte{'='}, 2)
+                       k := string(bytes.TrimSpace(kv[0]))
+                       if len(kv) != 2 || !strings.HasPrefix(k, "ARVADOS_") {
+                               // Same behavior as python sdk:
+                               // silently skip leading # (comments),
+                               // blank lines, typos, and non-Arvados
+                               // vars.
+                               continue
+                       }
+                       vars[k] = string(bytes.TrimSpace(kv[1]))
+               }
+       }
+       for _, env := range os.Environ() {
+               if !strings.HasPrefix(env, "ARVADOS_") {
+                       continue
+               }
+               kv := strings.SplitN(env, "=", 2)
+               if len(kv) == 2 {
+                       vars[kv[0]] = kv[1]
+               }
+       }
         var svcs []string
-       for _, s := range strings.Split(os.Getenv("ARVADOS_KEEP_SERVICES"), " ") {
+       for _, s := range strings.Split(vars["ARVADOS_KEEP_SERVICES"], " ") {
                 if s == "" {
                         continue
                 } else if u, err := url.Parse(s); err != nil {
@@ -118,13 +168,13 @@ func NewClientFromEnv() *Client {
                 }
         }
         var insecure bool
-       if s := strings.ToLower(os.Getenv("ARVADOS_API_HOST_INSECURE")); s == "1" || s == "yes" || s == "true" {
+       if s := strings.ToLower(vars["ARVADOS_API_HOST_INSECURE"]); s == "1" || s == "yes" || s == "true" {
                 insecure = true
         }
         return &Client{
                 Scheme:          "https",
-               APIHost:         os.Getenv("ARVADOS_API_HOST"),
-               AuthToken:       os.Getenv("ARVADOS_API_TOKEN"),
+               APIHost:         vars["ARVADOS_API_HOST"],
+               AuthToken:       vars["ARVADOS_API_TOKEN"],
                 Insecure:        insecure,
                 KeepServiceURIs: svcs,
                 Timeout:         5 * time.Minute,
diff --git a/sdk/go/arvados/client_test.go b/sdk/go/arvados/client_test.go

index df938008d49756b850ca6e5ce5abee8a0510e2a3..2363803cab1de157f4074d3a2770f2cc0c9201ca 100644 (file)
--- a/sdk/go/arvados/client_test.go
+++ b/sdk/go/arvados/client_test.go
@@ -10,9 +10,12 @@ import (
         "io/ioutil"
         "net/http"
         "net/url"
+       "os"
+       "strings"
         "sync"
-       "testing"
         "testing/iotest"
+
+       check "gopkg.in/check.v1"
  )
  
  type stubTransport struct {
@@ -68,43 +71,36 @@ func (stub *timeoutTransport) RoundTrip(req *http.Request) (*http.Response, erro
         }, nil
  }
  
-func TestCurrentUser(t *testing.T) {
-       t.Parallel()
+var _ = check.Suite(&clientSuite{})
+
+type clientSuite struct{}
+
+func (*clientSuite) TestCurrentUser(c *check.C) {
         stub := &stubTransport{
                 Responses: map[string]string{
                         "/arvados/v1/users/current": `{"uuid":"zzzzz-abcde-012340123401234"}`,
                 },
         }
-       c := &Client{
+       client := &Client{
                 Client: &http.Client{
                         Transport: stub,
                 },
                 APIHost:   "zzzzz.arvadosapi.com",
                 AuthToken: "xyzzy",
         }
-       u, err := c.CurrentUser()
-       if err != nil {
-               t.Fatal(err)
-       }
-       if x := "zzzzz-abcde-012340123401234"; u.UUID != x {
-               t.Errorf("got uuid %q, expected %q", u.UUID, x)
-       }
-       if len(stub.Requests) < 1 {
-               t.Fatal("empty stub.Requests")
-       }
+       u, err := client.CurrentUser()
+       c.Check(err, check.IsNil)
+       c.Check(u.UUID, check.Equals, "zzzzz-abcde-012340123401234")
+       c.Check(stub.Requests, check.Not(check.HasLen), 0)
         hdr := stub.Requests[len(stub.Requests)-1].Header
-       if hdr.Get("Authorization") != "OAuth2 xyzzy" {
-               t.Errorf("got headers %+q, expected Authorization header", hdr)
-       }
+       c.Check(hdr.Get("Authorization"), check.Equals, "OAuth2 xyzzy")
  
-       c.Client.Transport = &errorTransport{}
-       u, err = c.CurrentUser()
-       if err == nil {
-               t.Errorf("got nil error, expected something awful")
-       }
+       client.Client.Transport = &errorTransport{}
+       u, err = client.CurrentUser()
+       c.Check(err, check.NotNil)
  }
  
-func TestAnythingToValues(t *testing.T) {
+func (*clientSuite) TestAnythingToValues(c *check.C) {
         type testCase struct {
                 in interface{}
                 // ok==nil means anythingToValues should return an
@@ -158,17 +154,66 @@ func TestAnythingToValues(t *testing.T) {
                         ok: nil,
                 },
         } {
-               t.Logf("%#v", tc.in)
+               c.Logf("%#v", tc.in)
                 out, err := anythingToValues(tc.in)
-               switch {
-               case tc.ok == nil:
-                       if err == nil {
-                               t.Errorf("got %#v, expected error", out)
-                       }
-               case err != nil:
-                       t.Errorf("got err %#v, expected nil", err)
-               case !tc.ok(out):
-                       t.Errorf("got %#v but tc.ok() says that is wrong", out)
+               if tc.ok == nil {
+                       c.Check(err, check.NotNil)
+                       continue
                 }
+               c.Check(err, check.IsNil)
+               c.Check(tc.ok(out), check.Equals, true)
         }
  }
+
+func (*clientSuite) TestLoadConfig(c *check.C) {
+       oldenv := os.Environ()
+       defer func() {
+               os.Clearenv()
+               for _, s := range oldenv {
+                       i := strings.IndexRune(s, '=')
+                       os.Setenv(s[:i], s[i+1:])
+               }
+       }()
+
+       tmp := c.MkDir()
+       os.Setenv("HOME", tmp)
+       for _, s := range os.Environ() {
+               if strings.HasPrefix(s, "ARVADOS_") {
+                       i := strings.IndexRune(s, '=')
+                       os.Unsetenv(s[:i])
+               }
+       }
+       os.Mkdir(tmp+"/.config", 0777)
+       os.Mkdir(tmp+"/.config/arvados", 0777)
+
+       // Use $HOME/.config/arvados/settings.conf if no env vars are
+       // set
+       os.WriteFile(tmp+"/.config/arvados/settings.conf", []byte(`
+               ARVADOS_API_HOST = localhost:1
+               ARVADOS_API_TOKEN = token_from_settings_file1
+       `), 0777)
+       client := NewClientFromEnv()
+       c.Check(client.AuthToken, check.Equals, "token_from_settings_file1")
+       c.Check(client.APIHost, check.Equals, "localhost:1")
+       c.Check(client.Insecure, check.Equals, false)
+
+       // ..._INSECURE=true, comments, ignored lines in settings.conf
+       os.WriteFile(tmp+"/.config/arvados/settings.conf", []byte(`
+               (ignored) = (ignored)
+               #ARVADOS_API_HOST = localhost:2
+               ARVADOS_API_TOKEN = token_from_settings_file2
+               ARVADOS_API_HOST_INSECURE = true
+       `), 0777)
+       client = NewClientFromEnv()
+       c.Check(client.AuthToken, check.Equals, "token_from_settings_file2")
+       c.Check(client.APIHost, check.Equals, "")
+       c.Check(client.Insecure, check.Equals, true)
+
+       // Environment variables override settings.conf
+       os.Setenv("ARVADOS_API_HOST", "[::]:3")
+       os.Setenv("ARVADOS_API_HOST_INSECURE", "0")
+       client = NewClientFromEnv()
+       c.Check(client.AuthToken, check.Equals, "token_from_settings_file2")
+       c.Check(client.APIHost, check.Equals, "[::]:3")
+       c.Check(client.Insecure, check.Equals, false)
+}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go

index b8c8269f12acba74feb00edc07ec7949e0db5fc4..6c9324e478a273b68e83dcd7fb4e46150b8e0da1 100644 (file)
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -94,21 +94,24 @@ type Cluster struct {
         PostgreSQL      PostgreSQL
  
         API struct {
-               AsyncPermissionsUpdateInterval Duration
-               DisabledAPIs                   StringSet
-               MaxIndexDatabaseRead           int
-               MaxItemsPerResponse            int
-               MaxConcurrentRequests          int
-               MaxKeepBlobBuffers             int
-               MaxRequestAmplification        int
-               MaxRequestSize                 int
-               MaxTokenLifetime               Duration
-               RequestTimeout                 Duration
-               SendTimeout                    Duration
-               WebsocketClientEventQueue      int
-               WebsocketServerEventQueue      int
-               KeepServiceRequestTimeout      Duration
-               VocabularyPath                 string
+               AsyncPermissionsUpdateInterval   Duration
+               DisabledAPIs                     StringSet
+               MaxIndexDatabaseRead             int
+               MaxItemsPerResponse              int
+               MaxConcurrentRequests            int
+               MaxKeepBlobBuffers               int
+               MaxRequestAmplification          int
+               MaxRequestSize                   int
+               MaxTokenLifetime                 Duration
+               RequestTimeout                   Duration
+               SendTimeout                      Duration
+               WebsocketClientEventQueue        int
+               WebsocketServerEventQueue        int
+               KeepServiceRequestTimeout        Duration
+               VocabularyPath                   string
+               FreezeProjectRequiresDescription bool
+               FreezeProjectRequiresProperties  StringSet
+               UnfreezeProjectRequiresAdmin     bool
         }
         AuditLogs struct {
                 MaxAge             Duration
@@ -445,6 +448,7 @@ type ContainersConfig struct {
         StaleLockTimeout              Duration
         SupportedDockerImageFormats   StringSet
         AlwaysUsePreemptibleInstances bool
+       PreemptiblePriceFactor        float64
         RuntimeEngine                 string
         LocalKeepBlobBuffersPerVCPU   int
         LocalKeepLogsToContainerLog   string
diff --git a/sdk/go/arvados/group.go b/sdk/go/arvados/group.go

index d5243dc170708afac1e51556adbf145c900c4f43..ad7ac1ee2b74a0d972b2adb4570a6ba417d455b6 100644 (file)
--- a/sdk/go/arvados/group.go
+++ b/sdk/go/arvados/group.go
@@ -26,6 +26,7 @@ type Group struct {
         Properties           map[string]interface{} `json:"properties"`
         WritableBy           []string               `json:"writable_by,omitempty"`
         Description          string                 `json:"description"`
+       FrozenByUUID         string                 `json:"frozen_by_uuid"`
  }
  
  // GroupList is an arvados#groupList resource.
diff --git a/sdk/go/httpserver/logger.go b/sdk/go/httpserver/logger.go

index 7eb7f0f03d57b571e314f8d87ca6714cf7d6563f..5a46635e9102365bbfd01c9c9c120bd8e23a7026 100644 (file)
--- a/sdk/go/httpserver/logger.go
+++ b/sdk/go/httpserver/logger.go
@@ -9,6 +9,7 @@ import (
         "context"
         "net"
         "net/http"
+       "sync"
         "time"
  
         "git.arvados.org/arvados.git/sdk/go/ctxlog"
@@ -21,7 +22,9 @@ type contextKey struct {
  }
  
  var (
-       requestTimeContextKey = contextKey{"requestTime"}
+       requestTimeContextKey       = contextKey{"requestTime"}
+       responseLogFieldsContextKey = contextKey{"responseLogFields"}
+       mutexContextKey             = contextKey{"mutex"}
  )
  
  type hijacker interface {
@@ -64,6 +67,19 @@ func HandlerWithDeadline(timeout time.Duration, next http.Handler) http.Handler
         })
  }
  
+func SetResponseLogFields(ctx context.Context, fields logrus.Fields) {
+       m, _ := ctx.Value(&mutexContextKey).(*sync.Mutex)
+       c, _ := ctx.Value(&responseLogFieldsContextKey).(logrus.Fields)
+       if m == nil || c == nil {
+               return
+       }
+       m.Lock()
+       defer m.Unlock()
+       for k, v := range fields {
+               c[k] = v
+       }
+}
+
  // LogRequests wraps an http.Handler, logging each request and
  // response.
  func LogRequests(h http.Handler) http.Handler {
@@ -81,6 +97,8 @@ func LogRequests(h http.Handler) http.Handler {
                 })
                 ctx := req.Context()
                 ctx = context.WithValue(ctx, &requestTimeContextKey, time.Now())
+               ctx = context.WithValue(ctx, &responseLogFieldsContextKey, logrus.Fields{})
+               ctx = context.WithValue(ctx, &mutexContextKey, &sync.Mutex{})
                 ctx = ctxlog.Context(ctx, lgr)
                 req = req.WithContext(ctx)
  
@@ -124,6 +142,9 @@ func logResponse(w *responseTimer, req *http.Request, lgr *logrus.Entry) {
                         "timeWriteBody": stats.Duration(tDone.Sub(writeTime)),
                 })
         }
+       if responseLogFields, ok := req.Context().Value(&responseLogFieldsContextKey).(logrus.Fields); ok {
+               lgr = lgr.WithFields(responseLogFields)
+       }
         respCode := w.WroteStatus()
         if respCode == 0 {
                 respCode = http.StatusOK
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py

index 0fcdc1e6334957f27a5ff1f10fbdedcf2716609a..2ce0e46b30bd67ad948f832183ab091865c2ea53 100644 (file)
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -481,7 +481,7 @@ class _BlockManager(object):
      DEFAULT_PUT_THREADS = 2
      DEFAULT_GET_THREADS = 2
  
-    def __init__(self, keep, copies=None, put_threads=None, num_retries=None, storage_classes_func=None):
+    def __init__(self, keep, copies=None, put_threads=None, num_retries=None, storage_classes_func=None, get_threads=None):
          """keep: KeepClient object to use"""
          self._keep = keep
          self._bufferblocks = collections.OrderedDict()
@@ -492,7 +492,7 @@ class _BlockManager(object):
          self.lock = threading.Lock()
          self.prefetch_enabled = True
          self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
-        self.num_get_threads = _BlockManager.DEFAULT_GET_THREADS
+        self.num_get_threads = get_threads or _BlockManager.DEFAULT_GET_THREADS
          self.copies = copies
          self.storage_classes = storage_classes_func or (lambda: [])
          self._pending_write_size = 0
@@ -593,7 +593,7 @@ class _BlockManager(object):
                  b = self._prefetch_queue.get()
                  if b is None:
                      return
-                self._keep.get(b)
+                self._keep.get(b, prefetch=True)
              except Exception:
                  _logger.exception("Exception doing block prefetch")
  
@@ -841,9 +841,6 @@ class _BlockManager(object):
          if not self.prefetch_enabled:
              return
  
-        if self._keep.get_from_cache(locator) is not None:
-            return
-
          with self.lock:
              if locator in self._bufferblocks:
                  return
@@ -1099,7 +1096,7 @@ class ArvadosFile(object):
              if size == 0 or offset >= self.size():
                  return b''
              readsegs = locators_and_ranges(self._segments, offset, size)
-            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
+            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager().num_get_threads, limit=32)
  
          locs = set()
          data = []
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py

index a076de6baf622f560f92859db68e7e8cdafc65f9..a44d42b6ac7cd7a4156ab3b8bc4f72f86060e3a0 100644 (file)
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -1262,7 +1262,8 @@ class Collection(RichCollectionBase):
                   block_manager=None,
                   replication_desired=None,
                   storage_classes_desired=None,
-                 put_threads=None):
+                 put_threads=None,
+                 get_threads=None):
          """Collection constructor.
  
          :manifest_locator_or_text:
@@ -1311,6 +1312,7 @@ class Collection(RichCollectionBase):
          self.replication_desired = replication_desired
          self._storage_classes_desired = storage_classes_desired
          self.put_threads = put_threads
+        self.get_threads = get_threads
  
          if apiconfig:
              self._config = apiconfig
@@ -1424,7 +1426,12 @@ class Collection(RichCollectionBase):
              copies = (self.replication_desired or
                        self._my_api()._rootDesc.get('defaultCollectionReplication',
                                                     2))
-            self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads, num_retries=self.num_retries, storage_classes_func=self.storage_classes_desired)
+            self._block_manager = _BlockManager(self._my_keep(),
+                                                copies=copies,
+                                                put_threads=self.put_threads,
+                                                num_retries=self.num_retries,
+                                                storage_classes_func=self.storage_classes_desired,
+                                                get_threads=self.get_threads,)
          return self._block_manager
  
      def _remember_api_response(self, response):
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py

index eb682976253b66a3c0c6c0ebd0a9dc5ca306d499..c061c70f0eebbac2ed2025fdecd27865c27139b8 100755 (executable)
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -98,6 +98,15 @@ When getting a collection manifest, strip its access tokens before writing
  it.
  """)
  
+parser.add_argument('--threads', type=int, metavar='N', default=4,
+                    help="""
+Set the number of download threads to be used. Take into account that
+using lots of threads will increase the RAM requirements. Default is
+to use 4 threads.
+On high latency installations, using a greater number will improve
+overall throughput.
+""")
+
  def parse_arguments(arguments, stdout, stderr):
      args = parser.parse_args(arguments)
  
@@ -191,7 +200,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
  
      try:
          reader = arvados.CollectionReader(
-            col_loc, api_client=api_client, num_retries=args.retries)
+            col_loc, api_client=api_client, num_retries=args.retries,
+            keep_client=arvados.keep.KeepClient(block_cache=arvados.keep.KeepBlockCache((args.threads+1)*64 * 1024 * 1024)),
+            get_threads=args.threads)
      except Exception as error:
          logger.error("failed to read collection: {}".format(error))
          return 1
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py

index 1a83eae944c59f8dde5e3a7c63de8bbe9c62a9c9..7c05cc0a6a2c72ca818686b6eea5c6f0a4874d3d 100644 (file)
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -1036,9 +1036,10 @@ class KeepClient(object):
          else:
              return None
  
-    def get_from_cache(self, loc):
+    def get_from_cache(self, loc_s):
          """Fetch a block only if is in the cache, otherwise return None."""
-        slot = self.block_cache.get(loc)
+        locator = KeepLocator(loc_s)
+        slot = self.block_cache.get(locator.md5sum)
          if slot is not None and slot.ready.is_set():
              return slot.get()
          else:
@@ -1057,7 +1058,7 @@ class KeepClient(object):
      def get(self, loc_s, **kwargs):
          return self._get_or_head(loc_s, method="GET", **kwargs)
  
-    def _get_or_head(self, loc_s, method="GET", num_retries=None, request_id=None, headers=None):
+    def _get_or_head(self, loc_s, method="GET", num_retries=None, request_id=None, headers=None, prefetch=False):
          """Get data from Keep.
  
          This method fetches one or more blocks of data from Keep.  It
@@ -1096,6 +1097,13 @@ class KeepClient(object):
              if method == "GET":
                  slot, first = self.block_cache.reserve_cache(locator.md5sum)
                  if not first:
+                    if prefetch:
+                        # this is request for a prefetch, if it is
+                        # already in flight, return immediately.
+                        # clear 'slot' to prevent finally block from
+                        # calling slot.set()
+                        slot = None
+                        return None
                      self.hits_counter.add(1)
                      blob = slot.get()
                      if blob is None:
@@ -1332,6 +1340,3 @@ class KeepClient(object):
              return True
          if os.path.exists(os.path.join(self.local_store, locator.md5sum)):
              return True
-
-    def is_cached(self, locator):
-        return self.block_cache.reserve_cache(expect_hash)
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py

index be8a03fc314d2cf599c16d5f44b1ab61cc9e885d..c383d529e8087da579fcf4ae6814f76a57044e29 100644 (file)
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -392,7 +392,8 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
      pagesize = 1000
      kwargs["limit"] = pagesize
      kwargs["count"] = 'none'
-    kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"]
+    asc = "asc" if ascending else "desc"
+    kwargs["order"] = ["%s %s" % (order_key, asc), "uuid %s" % asc]
      other_filters = kwargs.get("filters", [])
  
      if "select" in kwargs and "uuid" not in kwargs["select"]:
@@ -436,7 +437,7 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
          if firstitem[order_key] == lastitem[order_key]:
              # Got a page where every item has the same order key.
              # Switch to using uuid for paging.
-            nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]]
+            nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">" if ascending else "<", lastitem["uuid"]]]
              prev_page_all_same_order_key = True
          else:
              # Start from the last order key seen, but skip the last
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py

index 0b8e7c8f8bf2d4615209e0dbdbfd81e6e54b32af..b45a592ecd0fbd1b1d4722bd63f6e1e0b25514dd 100644 (file)
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -27,7 +27,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
          def __init__(self, blocks):
              self.blocks = blocks
              self.requests = []
-        def get(self, locator, num_retries=0):
+        def get(self, locator, num_retries=0, prefetch=False):
              self.requests.append(locator)
              return self.blocks.get(locator)
          def get_from_cache(self, locator):
@@ -627,6 +627,7 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
              def __init__(self, blocks, nocache):
                  self.blocks = blocks
                  self.nocache = nocache
+                self.num_get_threads = 1
  
              def block_prefetch(self, loc):
                  pass
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py

index a43e0d40dfe7ed48f5477689d3623afefe952ba3..5cf4993b2f3804d22209ae16db41fc7bc505efd8 100644 (file)
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -320,7 +320,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
          def __init__(self, content, num_retries=0):
              self.content = content
  
-        def get(self, locator, num_retries=0):
+        def get(self, locator, num_retries=0, prefetch=False):
              return self.content[locator]
  
      def test_stream_reader(self):
diff --git a/sdk/python/tests/test_util.py b/sdk/python/tests/test_util.py

index 1c0e437b41196bf1e56e2048e11029725b01da2e..4dba9ce3dc7a5105533a526fe3ee304ed60d784c 100644 (file)
--- a/sdk/python/tests/test_util.py
+++ b/sdk/python/tests/test_util.py
@@ -166,10 +166,10 @@ class KeysetListAllTestCase(unittest.TestCase):
  
      def test_onepage_desc(self):
          ks = KeysetTestHelper([[
-            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid asc"], "filters": []},
+            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid desc"], "filters": []},
              {"items": [{"created_at": "2", "uuid": "2"}, {"created_at": "1", "uuid": "1"}]}
          ], [
-            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid asc"], "filters": [["created_at", "<=", "1"], ["uuid", "!=", "1"]]},
+            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid desc"], "filters": [["created_at", "<=", "1"], ["uuid", "!=", "1"]]},
              {"items": []}
          ]])
  
diff --git a/services/api/app/controllers/arvados/v1/links_controller.rb b/services/api/app/controllers/arvados/v1/links_controller.rb

index 384ffd64b7080190c21a4ec46f2ab7ea31d97c83..7716a3d5cffd9f6ac44d3bb691d06c98ddf7acf2 100644 (file)
--- a/services/api/app/controllers/arvados/v1/links_controller.rb
+++ b/services/api/app/controllers/arvados/v1/links_controller.rb
@@ -26,8 +26,8 @@ class Arvados::V1::LinksController < ApplicationController
    def get_permissions
      if current_user.andand.can?(manage: @object)
        # find all links and return them
-      @objects = Link.where(link_class: "permission",
-                            head_uuid: params[:uuid])
+      @objects = Link.unscoped.where(link_class: "permission",
+                                     head_uuid: params[:uuid])
        @offset = 0
        @limit = @objects.count
        render_list
@@ -39,14 +39,37 @@ class Arvados::V1::LinksController < ApplicationController
    protected
  
    def find_object_by_uuid
+    if params[:id] && params[:id].match(/\D/)
+      params[:uuid] = params.delete :id
+    end
      if action_name == 'get_permissions'
        # get_permissions accepts a UUID for any kind of object.
        @object = ArvadosModel::resource_class_for_uuid(params[:uuid])
          .readable_by(*@read_users)
          .where(uuid: params[:uuid])
          .first
-    else
+    elsif !current_user
        super
+    else
+      # The usual permission-filtering index query is unnecessarily
+      # inefficient, and doesn't match all permission links that
+      # should be visible (see #18865).  Instead, we look up the link
+      # by UUID, then check whether (a) its tail_uuid is the current
+      # user or (b) its head_uuid is an object the current_user
+      # can_manage.
+      link = Link.unscoped.where(uuid: params[:uuid]).first
+      if link && link.link_class != 'permission'
+        # Not a permission link. Re-fetch using generic
+        # permission-filtering query.
+        super
+      elsif link && (current_user.uuid == link.tail_uuid ||
+                     current_user.can?(manage: link.head_uuid))
+        # Permission granted.
+        @object = link
+      else
+        # Permission denied, i.e., link is invisible => 404.
+        @object = nil
+      end
      end
    end
  
@@ -86,6 +109,32 @@ class Arvados::V1::LinksController < ApplicationController
          k
        end
      end
+
+    # If the provided filters are enough to limit the results to
+    # permission links with specific head_uuids or
+    # tail_uuid=current_user, bypass the normal readable_by query
+    # (which doesn't match all can_manage-able items, see #18865) --
+    # just ensure the current user actually has can_manage permission
+    # for the provided head_uuids, removing any that don't. At that
+    # point the caller's filters are an effective permission filter.
+    if @filters.include?(['link_class', '=', 'permission'])
+      @filters.map do |k|
+        if k[0] == 'tail_uuid' && k[1] == '=' && k[2] == current_user.uuid
+          @objects = Link.unscoped
+        elsif k[0] == 'head_uuid'
+          if k[1] == '=' && current_user.can?(manage: k[2])
+            @objects = Link.unscoped
+          elsif k[1] == 'in'
+            # Modify the filter operand element (k[2]) in place,
+            # removing any non-permitted UUIDs.
+            k[2].select! do |head_uuid|
+              current_user.can?(manage: head_uuid)
+            end
+            @objects = Link.unscoped
+          end
+        end
+      end
+    end
    end
  
  end
diff --git a/services/api/app/models/api_client_authorization.rb b/services/api/app/models/api_client_authorization.rb

index 993a49e5b75e7ecfb782a306df16c74b37fbed4a..52922d32b1868fdb53d8bcd3f1197d149a93bb63 100644 (file)
--- a/services/api/app/models/api_client_authorization.rb
+++ b/services/api/app/models/api_client_authorization.rb
@@ -116,7 +116,7 @@ class ApiClientAuthorization < ArvadosModel
      clnt
    end
  
-  def self.check_anonymous_user_token token
+  def self.check_anonymous_user_token(token:, remote:)
      case token[0..2]
      when 'v2/'
        _, token_uuid, secret, optional = token.split('/')
@@ -130,11 +130,16 @@ class ApiClientAuthorization < ArvadosModel
        secret = token
      end
  
+    # Usually, the secret is salted
+    salted_secret = OpenSSL::HMAC.hexdigest('sha1', Rails.configuration.Users.AnonymousUserToken, remote)
+
+    # The anonymous token could be specified as a full v2 token in the config,
+    # but the config loader strips it down to the secret part.
      # The anonymous token content and minimum length is verified in lib/config
-    if secret.length >= 0 && secret == Rails.configuration.Users.AnonymousUserToken
+    if secret.length >= 0 && (secret == Rails.configuration.Users.AnonymousUserToken || secret == salted_secret)
        return ApiClientAuthorization.new(user: User.find_by_uuid(anonymous_user_uuid),
                                          uuid: Rails.configuration.ClusterID+"-gj3su-anonymouspublic",
-                                        api_token: token,
+                                        api_token: secret,
                                          api_client: anonymous_user_token_api_client,
                                          scopes: ['GET /'])
      else
@@ -157,7 +162,7 @@ class ApiClientAuthorization < ArvadosModel
      return nil if token.nil? or token.empty?
      remote ||= Rails.configuration.ClusterID
  
-    auth = self.check_anonymous_user_token(token)
+    auth = self.check_anonymous_user_token(token: token, remote: remote)
      if !auth.nil?
        return auth
      end
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index 1c842c48d6bf1d283ae08608dc242967b0cc2b56..07a31d81a8a129dc67acb4aa1a7fa8f39e253ea1 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -220,7 +220,7 @@ class ArvadosModel < ApplicationRecord
    end
  
    def self.default_orders
-    ["#{table_name}.modified_at desc", "#{table_name}.uuid"]
+    ["#{table_name}.modified_at desc", "#{table_name}.uuid desc"]
    end
  
    def self.unique_columns
@@ -246,6 +246,15 @@ class ArvadosModel < ApplicationRecord
    # If current user cannot write this object, just return
    # [self.owner_uuid].
    def writable_by
+    # Return [] if this is a frozen project and the current user can't
+    # unfreeze
+    return [] if respond_to?(:frozen_by_uuid) && frozen_by_uuid &&
+                 (Rails.configuration.API.UnfreezeProjectRequiresAdmin ?
+                    !current_user.andand.is_admin :
+                    !current_user.can?(manage: uuid))
+    # Return [] if nobody can write because this object is inside a
+    # frozen project
+    return [] if FrozenGroup.where(uuid: owner_uuid).any?
      return [owner_uuid] if not current_user
      unless (owner_uuid == current_user.uuid or
              current_user.is_admin or
@@ -288,26 +297,41 @@ class ArvadosModel < ApplicationRecord
      user_uuids = users_list.map { |u| u.uuid }
      all_user_uuids = []
  
+    admin = users_list.select { |u| u.is_admin }.any?
+
      # For details on how the trashed_groups table is constructed, see
      # see db/migrate/20200501150153_permission_table.rb
  
-    exclude_trashed_records = ""
-    if !include_trash and (sql_table == "groups" or sql_table == "collections") then
-      # Only include records that are not trashed
-      exclude_trashed_records = "AND (#{sql_table}.trash_at is NULL or #{sql_table}.trash_at > statement_timestamp())"
-    end
+    # excluded_trash is a SQL expression that determines whether a row
+    # should be excluded from the results due to being trashed.
+    # Trashed items inside frozen projects are invisible to regular
+    # (non-admin) users even when using include_trash, so we have:
+    #
+    # (item_trashed || item_inside_trashed_project)
+    # &&
+    # (!caller_requests_include_trash ||
+    #  (item_inside_frozen_project && caller_is_not_admin))
+    if (admin && include_trash) || sql_table == "api_client_authorizations"
+      excluded_trash = "false"
+    else
+      excluded_trash = "(#{sql_table}.owner_uuid IN (SELECT group_uuid FROM #{TRASHED_GROUPS} " +
+                       "WHERE trash_at <= statement_timestamp()))"
+      if sql_table == "groups" || sql_table == "collections"
+        excluded_trash = "(#{excluded_trash} OR #{sql_table}.trash_at <= statement_timestamp() IS TRUE)"
+      end
  
-    trashed_check = ""
-    if !include_trash && sql_table != "api_client_authorizations"
-      trashed_check = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} " +
-                      "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
+      if include_trash
+        # Exclude trash inside frozen projects
+        excluded_trash = "(#{excluded_trash} AND #{sql_table}.owner_uuid IN (SELECT uuid FROM #{FROZEN_GROUPS}))"
+      end
      end
  
-    if users_list.select { |u| u.is_admin }.any?
-      # Admin skips most permission checks, but still want to filter on trashed items.
+    if admin
+      # Admin skips most permission checks, but still want to filter
+      # on trashed items.
        if !include_trash && sql_table != "api_client_authorizations"
          # Only include records where the owner is not trashed
-        sql_conds = trashed_check
+        sql_conds = "NOT (#{excluded_trash})"
        end
      else
        # The core of the permission check is a join against the
@@ -407,7 +431,7 @@ class ArvadosModel < ApplicationRecord
                       "    WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 3))) "
        end
  
-      sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) #{trashed_check.empty? ? "" : "AND"} #{trashed_check}"
+      sql_conds = "(#{owner_check} #{direct_check} #{links_cond}) AND NOT (#{excluded_trash})"
  
      end
  
@@ -614,8 +638,12 @@ class ArvadosModel < ApplicationRecord
          if check_uuid.nil?
            # old_owner_uuid is nil? New record, no need to check.
          elsif !current_user.can?(write: check_uuid)
-          logger.warn "User #{current_user.uuid} tried to set ownership of #{self.class.to_s} #{self.uuid} but does not have permission to write #{which} owner_uuid #{check_uuid}"
-          errors.add :owner_uuid, "cannot be set or changed without write permission on #{which} owner"
+          if FrozenGroup.where(uuid: check_uuid).any?
+            errors.add :owner_uuid, "cannot be set or changed because #{which} owner is frozen"
+          else
+            logger.warn "User #{current_user.uuid} tried to set ownership of #{self.class.to_s} #{self.uuid} but does not have permission to write #{which} owner_uuid #{check_uuid}"
+            errors.add :owner_uuid, "cannot be set or changed without write permission on #{which} owner"
+          end
            raise PermissionDeniedError
          elsif rsc_class == Group && Group.find_by_uuid(owner_uuid).group_class != "project"
            errors.add :owner_uuid, "must be a project"
@@ -625,8 +653,12 @@ class ArvadosModel < ApplicationRecord
      else
        # If the object already existed and we're not changing
        # owner_uuid, we only need write permission on the object
-      # itself.
-      if !current_user.can?(write: self.uuid)
+      # itself. (If we're in the act of unfreezing, we only need
+      # :unfreeze permission, which means "what write permission would
+      # be if target weren't frozen")
+      unless ((respond_to?(:frozen_by_uuid) && frozen_by_uuid_was && !frozen_by_uuid) ?
+                current_user.can?(unfreeze: uuid) :
+                current_user.can?(write: uuid))
          logger.warn "User #{current_user.uuid} tried to modify #{self.class.to_s} #{self.uuid} without write permission"
          errors.add :uuid, " #{uuid} is not writable by #{current_user.uuid}"
          raise PermissionDeniedError
@@ -643,7 +675,7 @@ class ArvadosModel < ApplicationRecord
    end
  
    def permission_to_create
-    current_user.andand.is_active
+    return current_user.andand.is_active
    end
  
    def permission_to_update
diff --git a/services/api/app/models/frozen_group.rb b/services/api/app/models/frozen_group.rb

new file mode 100644 (file)

index 0000000..bf4ee5d
--- /dev/null
+++ b/services/api/app/models/frozen_group.rb
@@ -0,0 +1,6 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class FrozenGroup < ApplicationRecord
+end
diff --git a/services/api/app/models/group.rb b/services/api/app/models/group.rb

index 8565b2a417efc0a28d611c2ff1ed873acfc39a8c..b1b2e942c60c4bc79c13dbe731ad9bc1112684d2 100644 (file)
--- a/services/api/app/models/group.rb
+++ b/services/api/app/models/group.rb
@@ -19,9 +19,11 @@ class Group < ArvadosModel
    validate :ensure_filesystem_compatible_name
    validate :check_group_class
    validate :check_filter_group_filters
+  validate :check_frozen_state_change_allowed
    before_create :assign_name
    after_create :after_ownership_change
    after_create :update_trash
+  after_create :update_frozen
  
    before_update :before_ownership_change
    after_update :after_ownership_change
@@ -29,7 +31,8 @@ class Group < ArvadosModel
    after_create :add_role_manage_link
  
    after_update :update_trash
-  before_destroy :clear_permissions_and_trash
+  after_update :update_frozen
+  before_destroy :clear_permissions_trash_frozen
  
    api_accessible :user, extend: :common do |t|
      t.add :name
@@ -40,6 +43,7 @@ class Group < ArvadosModel
      t.add :trash_at
      t.add :is_trashed
      t.add :properties
+    t.add :frozen_by_uuid
    end
  
    def ensure_filesystem_compatible_name
@@ -92,37 +96,104 @@ class Group < ArvadosModel
      end
    end
  
+  def check_frozen_state_change_allowed
+    if frozen_by_uuid == ""
+      self.frozen_by_uuid = nil
+    end
+    if frozen_by_uuid_changed? || (new_record? && frozen_by_uuid)
+      if group_class != "project"
+        errors.add(:frozen_by_uuid, "cannot be modified on a non-project group")
+        return
+      end
+      if frozen_by_uuid_was && Rails.configuration.API.UnfreezeProjectRequiresAdmin && !current_user.is_admin
+        errors.add(:frozen_by_uuid, "can only be changed by an admin user, once set")
+        return
+      end
+      if frozen_by_uuid && frozen_by_uuid != current_user.uuid
+        errors.add(:frozen_by_uuid, "can only be set to the current user's UUID")
+        return
+      end
+      if !new_record? && !current_user.can?(manage: uuid)
+        raise PermissionDeniedError
+      end
+      if trash_at || delete_at || (!new_record? && TrashedGroup.where(group_uuid: uuid).any?)
+        errors.add(:frozen_by_uuid, "cannot be set on a trashed project")
+      end
+      if frozen_by_uuid_was.nil?
+        if Rails.configuration.API.FreezeProjectRequiresDescription && !attribute_present?(:description)
+          errors.add(:frozen_by_uuid, "can only be set if description is non-empty")
+        end
+        Rails.configuration.API.FreezeProjectRequiresProperties.andand.each do |key, _|
+          key = key.to_s
+          if !properties[key] || properties[key] == ""
+            errors.add(:frozen_by_uuid, "can only be set if properties[#{key}] value is non-empty")
+          end
+        end
+      end
+    end
+  end
+
    def update_trash
-    if saved_change_to_trash_at? or saved_change_to_owner_uuid?
-      # The group was added or removed from the trash.
-      #
-      # Strategy:
-      #   Compute project subtree, propagating trash_at to subprojects
-      #   Remove groups that don't belong from trash
-      #   Add/update groups that do belong in the trash
-
-      temptable = "group_subtree_#{rand(2**64).to_s(10)}"
-      ActiveRecord::Base.connection.exec_query %{
-create temporary table #{temptable} on commit drop
-as select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)
-},
-                                               'Group.update_trash.select',
-                                               [[nil, self.uuid],
-                                                [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
-                                                [nil, self.trash_at]]
-
-      ActiveRecord::Base.connection.exec_delete %{
-delete from trashed_groups where group_uuid in (select target_uuid from #{temptable} where trash_at is NULL);
-},
-                                            "Group.update_trash.delete"
-
-      ActiveRecord::Base.connection.exec_query %{
-insert into trashed_groups (group_uuid, trash_at)
-  select target_uuid as group_uuid, trash_at from #{temptable} where trash_at is not NULL
-on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at;
-},
-                                            "Group.update_trash.insert"
+    return unless saved_change_to_trash_at? || saved_change_to_owner_uuid?
+
+    # The group was added or removed from the trash.
+    #
+    # Strategy:
+    #   Compute project subtree, propagating trash_at to subprojects
+    #   Ensure none of the newly trashed descendants were frozen (if so, bail out)
+    #   Remove groups that don't belong from trash
+    #   Add/update groups that do belong in the trash
+
+    temptable = "group_subtree_#{rand(2**64).to_s(10)}"
+    ActiveRecord::Base.connection.exec_query(
+      "create temporary table #{temptable} on commit drop " +
+      "as select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)",
+      "Group.update_trash.select",
+      [[nil, self.uuid],
+       [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
+       [nil, self.trash_at]])
+    frozen_descendants = ActiveRecord::Base.connection.exec_query(
+      "select uuid from frozen_groups, #{temptable} where uuid = target_uuid",
+      "Group.update_trash.check_frozen")
+    if frozen_descendants.any?
+      raise ArgumentError.new("cannot trash project containing frozen project #{frozen_descendants[0]["uuid"]}")
+    end
+    ActiveRecord::Base.connection.exec_delete(
+      "delete from trashed_groups where group_uuid in (select target_uuid from #{temptable} where trash_at is NULL)",
+      "Group.update_trash.delete")
+    ActiveRecord::Base.connection.exec_query(
+      "insert into trashed_groups (group_uuid, trash_at) "+
+      "select target_uuid as group_uuid, trash_at from #{temptable} where trash_at is not NULL " +
+      "on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at",
+      "Group.update_trash.insert")
+  end
+
+  def update_frozen
+    return unless saved_change_to_frozen_by_uuid? || saved_change_to_owner_uuid?
+    temptable = "group_subtree_#{rand(2**64).to_s(10)}"
+    ActiveRecord::Base.connection.exec_query(
+      "create temporary table #{temptable} on commit drop as select * from project_subtree_with_is_frozen($1,$2)",
+      "Group.update_frozen.select",
+      [[nil, self.uuid],
+       [nil, !self.frozen_by_uuid.nil?]])
+    if frozen_by_uuid
+      rows = ActiveRecord::Base.connection.exec_query(
+        "select cr.uuid, cr.state from container_requests cr, #{temptable} frozen " +
+        "where cr.owner_uuid = frozen.uuid and frozen.is_frozen " +
+        "and cr.state not in ($1, $2) limit 1",
+        "Group.update_frozen.check_container_requests",
+        [[nil, ContainerRequest::Uncommitted],
+         [nil, ContainerRequest::Final]])
+      if rows.any?
+        raise ArgumentError.new("cannot freeze project containing container request #{rows.first['uuid']} with state = #{rows.first['state']}")
+      end
      end
+    ActiveRecord::Base.connection.exec_delete(
+      "delete from frozen_groups where uuid in (select uuid from #{temptable} where not is_frozen)",
+      "Group.update_frozen.delete")
+    ActiveRecord::Base.connection.exec_query(
+      "insert into frozen_groups (uuid) select uuid from #{temptable} where is_frozen on conflict do nothing",
+      "Group.update_frozen.insert")
    end
  
    def before_ownership_change
@@ -138,12 +209,16 @@ on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at;
      end
    end
  
-  def clear_permissions_and_trash
+  def clear_permissions_trash_frozen
      MaterializedPermission.where(target_uuid: uuid).delete_all
-    ActiveRecord::Base.connection.exec_delete %{
-delete from trashed_groups where group_uuid=$1
-}, "Group.clear_permissions_and_trash", [[nil, self.uuid]]
-
+    ActiveRecord::Base.connection.exec_delete(
+      "delete from trashed_groups where group_uuid=$1",
+      "Group.clear_permissions_trash_frozen",
+      [[nil, self.uuid]])
+    ActiveRecord::Base.connection.exec_delete(
+      "delete from frozen_groups where uuid=$1",
+      "Group.clear_permissions_trash_frozen",
+      [[nil, self.uuid]])
    end
  
    def assign_name
@@ -181,4 +256,19 @@ delete from trashed_groups where group_uuid=$1
        end
      end
    end
+
+  def permission_to_update
+    if !super
+      return false
+    elsif frozen_by_uuid && frozen_by_uuid_was
+      errors.add :uuid, "#{uuid} is frozen and cannot be modified"
+      return false
+    else
+      return true
+    end
+  end
+
+  def self.full_text_searchable_columns
+    super - ["frozen_by_uuid"]
+  end
  end
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb

index 811cd89758d9ef8642be5382c9bac54b4f649134..bbb2378f5c56becac22646212beb343549da5170 100644 (file)
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -87,6 +87,7 @@ class User < ArvadosModel
    VAL_FOR_PERM =
      {:read => 1,
       :write => 2,
+     :unfreeze => 3,
       :manage => 3}
  
  
@@ -141,6 +142,23 @@ SELECT 1 FROM #{PERMISSION_VIEW}
                    ).any?
          return false
        end
+
+      if action == :write
+        if FrozenGroup.where(uuid: [target_uuid, target_owner_uuid]).any?
+          # self or parent is frozen
+          return false
+        end
+      elsif action == :unfreeze
+        # "unfreeze" permission means "can write, but only if
+        # explicitly un-freezing at the same time" (see
+        # ArvadosModel#ensure_owner_uuid_is_permitted). If the
+        # permission query above passed the permission level of
+        # :unfreeze (which is the same as :manage), and the parent
+        # isn't also frozen, then un-freeze is allowed.
+        if FrozenGroup.where(uuid: target_owner_uuid).any?
+          return false
+        end
+      end
      end
      true
    end
diff --git a/services/api/db/migrate/20220224203102_add_frozen_by_uuid_to_groups.rb b/services/api/db/migrate/20220224203102_add_frozen_by_uuid_to_groups.rb

new file mode 100644 (file)

index 0000000..d730b25
--- /dev/null
+++ b/services/api/db/migrate/20220224203102_add_frozen_by_uuid_to_groups.rb
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddFrozenByUuidToGroups < ActiveRecord::Migration[5.2]
+  def change
+    add_column :groups, :frozen_by_uuid, :string
+  end
+end
diff --git a/services/api/db/migrate/20220301155729_frozen_groups.rb b/services/api/db/migrate/20220301155729_frozen_groups.rb

new file mode 100644 (file)

index 0000000..730d051
--- /dev/null
+++ b/services/api/db/migrate/20220301155729_frozen_groups.rb
@@ -0,0 +1,39 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+class FrozenGroups < ActiveRecord::Migration[5.0]
+  def up
+    create_table :frozen_groups, :id => false do |t|
+      t.string :uuid
+    end
+    add_index :frozen_groups, :uuid, :unique => true
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function project_subtree_with_is_frozen (starting_uuid varchar(27), starting_is_frozen boolean)
+returns table (uuid varchar(27), is_frozen boolean)
+STABLE
+language SQL
+as $$
+WITH RECURSIVE
+  project_subtree(uuid, is_frozen) as (
+    values (starting_uuid, starting_is_frozen)
+    union
+    select groups.uuid, project_subtree.is_frozen or groups.frozen_by_uuid is not null
+      from groups join project_subtree on project_subtree.uuid = groups.owner_uuid
+  )
+  select uuid, is_frozen from project_subtree;
+$$;
+}
+
+    # Initialize the table. After this, it is updated incrementally.
+    # See app/models/group.rb#update_frozen_groups
+    refresh_frozen
+  end
+
+  def down
+    drop_table :frozen_groups
+  end
+end
diff --git a/services/api/db/migrate/20220303204419_add_frozen_by_uuid_to_group_search_index.rb b/services/api/db/migrate/20220303204419_add_frozen_by_uuid_to_group_search_index.rb

new file mode 100644 (file)

index 0000000..52fb16b
--- /dev/null
+++ b/services/api/db/migrate/20220303204419_add_frozen_by_uuid_to_group_search_index.rb
@@ -0,0 +1,17 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddFrozenByUuidToGroupSearchIndex < ActiveRecord::Migration[5.0]
+  disable_ddl_transaction!
+
+  def up
+    remove_index :groups, :name => 'groups_search_index'
+    add_index :groups, ["uuid", "owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "name", "group_class", "frozen_by_uuid"], name: 'groups_search_index', algorithm: :concurrently
+  end
+
+  def down
+    remove_index :groups, :name => 'groups_search_index'
+    add_index :groups, ["uuid", "owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "name", "group_class"], name: 'groups_search_index', algorithm: :concurrently
+  end
+end
diff --git a/services/api/db/migrate/20220401153101_fix_created_at_indexes.rb b/services/api/db/migrate/20220401153101_fix_created_at_indexes.rb

new file mode 100644 (file)

index 0000000..590e841
--- /dev/null
+++ b/services/api/db/migrate/20220401153101_fix_created_at_indexes.rb
@@ -0,0 +1,28 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class FixCreatedAtIndexes < ActiveRecord::Migration[5.2]
+  @@idxtables = [:collections, :container_requests, :groups, :links, :repositories, :users, :virtual_machines, :workflows, :logs]
+
+  def up
+    @@idxtables.each do |table|
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_created_at")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_created_at_uuid")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_created_at_and_uuid")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at_uuid")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at_and_uuid")
+
+      ActiveRecord::Base.connection.execute("CREATE INDEX IF NOT EXISTS index_#{table.to_s}_on_created_at_and_uuid ON #{table.to_s} USING btree (created_at, uuid)")
+      ActiveRecord::Base.connection.execute("CREATE INDEX IF NOT EXISTS index_#{table.to_s}_on_modified_at_and_uuid ON #{table.to_s} USING btree (modified_at, uuid)")
+    end
+  end
+
+  def down
+    @@idxtables.each do |table|
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at_and_uuid")
+      ActiveRecord::Base.connection.execute("CREATE INDEX IF NOT EXISTS index_#{table.to_s}_on_modified_at_uuid ON #{table.to_s} USING btree (modified_at desc, uuid asc)")
+    end
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index da9959593c0a28853831d3cc6bd464be28bdbe07..e6bba676257118d542333120a5815549d2cb1f64 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -190,6 +190,24 @@ case (edges.edge_id = perm_edge_id)
  $$;
  
  
+--
+-- Name: project_subtree_with_is_frozen(character varying, boolean); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.project_subtree_with_is_frozen(starting_uuid character varying, starting_is_frozen boolean) RETURNS TABLE(uuid character varying, is_frozen boolean)
+    LANGUAGE sql STABLE
+    AS $$
+WITH RECURSIVE
+  project_subtree(uuid, is_frozen) as (
+    values (starting_uuid, starting_is_frozen)
+    union
+    select groups.uuid, project_subtree.is_frozen or groups.frozen_by_uuid is not null
+      from groups join project_subtree on (groups.owner_uuid = project_subtree.uuid)
+  )
+  select uuid, is_frozen from project_subtree;
+$$;
+
+
  --
  -- Name: project_subtree_with_trash_at(character varying, timestamp without time zone); Type: FUNCTION; Schema: public; Owner: -
  --
@@ -548,6 +566,15 @@ CREATE SEQUENCE public.containers_id_seq
  ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id;
  
  
+--
+-- Name: frozen_groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.frozen_groups (
+    uuid character varying
+);
+
+
  --
  -- Name: groups; Type: TABLE; Schema: public; Owner: -
  --
@@ -567,7 +594,8 @@ CREATE TABLE public.groups (
      trash_at timestamp without time zone,
      is_trashed boolean DEFAULT false NOT NULL,
      delete_at timestamp without time zone,
-    properties jsonb DEFAULT '{}'::jsonb
+    properties jsonb DEFAULT '{}'::jsonb,
+    frozen_by_uuid character varying
  );
  
  
@@ -1775,7 +1803,7 @@ CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
  -- Name: groups_search_index; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX groups_search_index ON public.groups USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, group_class);
+CREATE INDEX groups_search_index ON public.groups USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, group_class, frozen_by_uuid);
  
  
  --
@@ -1877,10 +1905,10 @@ CREATE UNIQUE INDEX index_authorized_keys_on_uuid ON public.authorized_keys USIN
  
  
  --
--- Name: index_collections_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_collections_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_collections_on_created_at ON public.collections USING btree (created_at);
+CREATE INDEX index_collections_on_created_at_and_uuid ON public.collections USING btree (created_at, uuid);
  
  
  --
@@ -1905,17 +1933,10 @@ CREATE INDEX index_collections_on_is_trashed ON public.collections USING btree (
  
  
  --
--- Name: index_collections_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_collections_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_collections_on_modified_at ON public.collections USING btree (modified_at);
-
-
---
--- Name: index_collections_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_collections_on_modified_at_uuid ON public.collections USING btree (modified_at DESC, uuid);
+CREATE INDEX index_collections_on_modified_at_and_uuid ON public.collections USING btree (modified_at, uuid);
  
  
  --
@@ -1961,10 +1982,17 @@ CREATE INDEX index_container_requests_on_container_uuid ON public.container_requ
  
  
  --
--- Name: index_container_requests_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_container_requests_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_container_requests_on_created_at_and_uuid ON public.container_requests USING btree (created_at, uuid);
+
+
+--
+-- Name: index_container_requests_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_container_requests_on_modified_at_uuid ON public.container_requests USING btree (modified_at DESC, uuid);
+CREATE INDEX index_container_requests_on_modified_at_and_uuid ON public.container_requests USING btree (modified_at, uuid);
  
  
  --
@@ -2059,10 +2087,17 @@ CREATE UNIQUE INDEX index_containers_on_uuid ON public.containers USING btree (u
  
  
  --
--- Name: index_groups_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_frozen_groups_on_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE UNIQUE INDEX index_frozen_groups_on_uuid ON public.frozen_groups USING btree (uuid);
+
+
+--
+-- Name: index_groups_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_groups_on_created_at ON public.groups USING btree (created_at);
+CREATE INDEX index_groups_on_created_at_and_uuid ON public.groups USING btree (created_at, uuid);
  
  
  --
@@ -2087,17 +2122,10 @@ CREATE INDEX index_groups_on_is_trashed ON public.groups USING btree (is_trashed
  
  
  --
--- Name: index_groups_on_modified_at; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_groups_on_modified_at ON public.groups USING btree (modified_at);
-
-
---
--- Name: index_groups_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_groups_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_groups_on_modified_at_uuid ON public.groups USING btree (modified_at DESC, uuid);
+CREATE INDEX index_groups_on_modified_at_and_uuid ON public.groups USING btree (modified_at, uuid);
  
  
  --
@@ -2325,10 +2353,10 @@ CREATE UNIQUE INDEX index_keep_services_on_uuid ON public.keep_services USING bt
  
  
  --
--- Name: index_links_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_links_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_links_on_created_at ON public.links USING btree (created_at);
+CREATE INDEX index_links_on_created_at_and_uuid ON public.links USING btree (created_at, uuid);
  
  
  --
@@ -2339,17 +2367,10 @@ CREATE INDEX index_links_on_head_uuid ON public.links USING btree (head_uuid);
  
  
  --
--- Name: index_links_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_links_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_links_on_modified_at ON public.links USING btree (modified_at);
-
-
---
--- Name: index_links_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_links_on_modified_at_uuid ON public.links USING btree (modified_at DESC, uuid);
+CREATE INDEX index_links_on_modified_at_and_uuid ON public.links USING btree (modified_at, uuid);
  
  
  --
@@ -2388,10 +2409,10 @@ CREATE UNIQUE INDEX index_links_on_uuid ON public.links USING btree (uuid);
  
  
  --
--- Name: index_logs_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_logs_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_logs_on_created_at ON public.logs USING btree (created_at);
+CREATE INDEX index_logs_on_created_at_and_uuid ON public.logs USING btree (created_at, uuid);
  
  
  --
@@ -2409,17 +2430,10 @@ CREATE INDEX index_logs_on_event_type ON public.logs USING btree (event_type);
  
  
  --
--- Name: index_logs_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_logs_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_logs_on_modified_at ON public.logs USING btree (modified_at);
-
-
---
--- Name: index_logs_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_logs_on_modified_at_uuid ON public.logs USING btree (modified_at DESC, uuid);
+CREATE INDEX index_logs_on_modified_at_and_uuid ON public.logs USING btree (modified_at, uuid);
  
  
  --
@@ -2570,10 +2584,17 @@ CREATE UNIQUE INDEX index_pipeline_templates_on_uuid ON public.pipeline_template
  
  
  --
--- Name: index_repositories_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_repositories_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_repositories_on_modified_at_uuid ON public.repositories USING btree (modified_at DESC, uuid);
+CREATE INDEX index_repositories_on_created_at_and_uuid ON public.repositories USING btree (created_at, uuid);
+
+
+--
+-- Name: index_repositories_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_repositories_on_modified_at_and_uuid ON public.repositories USING btree (modified_at, uuid);
  
  
  --
@@ -2654,10 +2675,10 @@ CREATE UNIQUE INDEX index_trashed_groups_on_group_uuid ON public.trashed_groups
  
  
  --
--- Name: index_users_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_users_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_users_on_created_at ON public.users USING btree (created_at);
+CREATE INDEX index_users_on_created_at_and_uuid ON public.users USING btree (created_at, uuid);
  
  
  --
@@ -2668,17 +2689,10 @@ CREATE UNIQUE INDEX index_users_on_identity_url ON public.users USING btree (ide
  
  
  --
--- Name: index_users_on_modified_at; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_users_on_modified_at ON public.users USING btree (modified_at);
-
-
---
--- Name: index_users_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_users_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_users_on_modified_at_uuid ON public.users USING btree (modified_at DESC, uuid);
+CREATE INDEX index_users_on_modified_at_and_uuid ON public.users USING btree (modified_at, uuid);
  
  
  --
@@ -2702,6 +2716,13 @@ CREATE UNIQUE INDEX index_users_on_username ON public.users USING btree (usernam
  CREATE UNIQUE INDEX index_users_on_uuid ON public.users USING btree (uuid);
  
  
+--
+-- Name: index_virtual_machines_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_virtual_machines_on_created_at_and_uuid ON public.virtual_machines USING btree (created_at, uuid);
+
+
  --
  -- Name: index_virtual_machines_on_hostname; Type: INDEX; Schema: public; Owner: -
  --
@@ -2710,10 +2731,10 @@ CREATE INDEX index_virtual_machines_on_hostname ON public.virtual_machines USING
  
  
  --
--- Name: index_virtual_machines_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_virtual_machines_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_virtual_machines_on_modified_at_uuid ON public.virtual_machines USING btree (modified_at DESC, uuid);
+CREATE INDEX index_virtual_machines_on_modified_at_and_uuid ON public.virtual_machines USING btree (modified_at, uuid);
  
  
  --
@@ -2731,10 +2752,17 @@ CREATE UNIQUE INDEX index_virtual_machines_on_uuid ON public.virtual_machines US
  
  
  --
--- Name: index_workflows_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_workflows_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_workflows_on_created_at_and_uuid ON public.workflows USING btree (created_at, uuid);
+
+
+--
+-- Name: index_workflows_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX index_workflows_on_modified_at_uuid ON public.workflows USING btree (modified_at DESC, uuid);
+CREATE INDEX index_workflows_on_modified_at_and_uuid ON public.workflows USING btree (modified_at, uuid);
  
  
  --
@@ -3147,6 +3175,10 @@ INSERT INTO "schema_migrations" (version) VALUES
  ('20210126183521'),
  ('20210621204455'),
  ('20210816191509'),
-('20211027154300');
+('20211027154300'),
+('20220224203102'),
+('20220301155729'),
+('20220303204419'),
+('20220401153101');
  
  
diff --git a/services/api/lib/20200501150153_permission_table_constants.rb b/services/api/lib/20200501150153_permission_table_constants.rb

index 74c15bc2e9381a13ae57021386d9393b35d86543..7ee5039368a282e3fabf8ec2d62f6b7f0d28c943 100644 (file)
--- a/services/api/lib/20200501150153_permission_table_constants.rb
+++ b/services/api/lib/20200501150153_permission_table_constants.rb
@@ -15,8 +15,8 @@
  # update_permissions reference that file instead.
  
  PERMISSION_VIEW = "materialized_permissions"
-
  TRASHED_GROUPS = "trashed_groups"
+FROZEN_GROUPS = "frozen_groups"
  
  # We need to use this parameterized query in a few different places,
  # including as a subquery in a larger query.
@@ -83,3 +83,21 @@ INSERT INTO materialized_permissions
  }, "refresh_permission_view.do"
    end
  end
+
+def refresh_frozen
+  ActiveRecord::Base.transaction do
+    ActiveRecord::Base.connection.execute("LOCK TABLE #{FROZEN_GROUPS}")
+    ActiveRecord::Base.connection.execute("DELETE FROM #{FROZEN_GROUPS}")
+
+    # Compute entire frozen_groups table, starting with top-level
+    # projects (i.e., project groups owned by a user).
+    ActiveRecord::Base.connection.execute(%{
+INSERT INTO #{FROZEN_GROUPS}
+select ps.uuid from groups,
+  lateral project_subtree_with_is_frozen(groups.uuid, groups.frozen_by_uuid is not null) ps
+  where groups.owner_uuid like '_____-tpzed-_______________'
+    and group_class = 'project'
+    and ps.is_frozen
+})
+  end
+end
diff --git a/services/api/test/fixtures/jobs.yml b/services/api/test/fixtures/jobs.yml

index ab76417902214162506707d3e642f93539ffe7ed..9280aeab935e19748f25ea347592d3d704161f02 100644 (file)
--- a/services/api/test/fixtures/jobs.yml
+++ b/services/api/test/fixtures/jobs.yml
@@ -8,8 +8,8 @@ running:
    cancelled_at: ~
    cancelled_by_user_uuid: ~
    cancelled_by_client_uuid: ~
-  created_at: <%= 3.minute.ago.to_s(:db) %>
-  started_at: <%= 3.minute.ago.to_s(:db) %>
+  created_at: <%= 2.7.minute.ago.to_s(:db) %>
+  started_at: <%= 2.7.minute.ago.to_s(:db) %>
    finished_at: ~
    script: hash
    repository: active/foo
diff --git a/services/api/test/fixtures/pipeline_instances.yml b/services/api/test/fixtures/pipeline_instances.yml

index 9621b3effc1c74f0b832c021b3c9d2b99ef11586..a504c9fadd790cb21e2410cb95cefd082c32cfbf 100644 (file)
--- a/services/api/test/fixtures/pipeline_instances.yml
+++ b/services/api/test/fixtures/pipeline_instances.yml
@@ -97,7 +97,7 @@ has_job:
    state: Ready
    uuid: zzzzz-d1hrv-1yfj6xkidf2muk3
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 2.9.minute.ago.to_s(:db) %>
    components:
     foo:
      script: foo
@@ -112,7 +112,7 @@ components_is_jobspec:
    # Helps test that clients cope with funny-shaped components.
    # For an example, see #3321.
    uuid: zzzzz-d1hrv-1yfj61234abcdk4
-  created_at: <%= 2.minute.ago.to_s(:db) %>
+  created_at: <%= 4.minute.ago.to_s(:db) %>
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
    modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
    modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
@@ -132,7 +132,7 @@ pipeline_with_tagged_collection_input:
    state: Ready
    uuid: zzzzz-d1hrv-1yfj61234abcdk3
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 3.2.minute.ago.to_s(:db) %>
    components:
      part-one:
        script_parameters:
@@ -145,7 +145,7 @@ pipeline_to_merge_params:
    uuid: zzzzz-d1hrv-1yfj6dcba4321k3
    pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 3.3.minute.ago.to_s(:db) %>
    components:
      part-one:
        script_parameters:
@@ -193,7 +193,7 @@ pipeline_instance_owned_by_fuse:
    uuid: zzzzz-d1hrv-ri9dvgkgqs9y09j
    owner_uuid: zzzzz-tpzed-0fusedrivertest
    pipeline_template_uuid: zzzzz-p5p6p-vq4wuvy84xvaq2r
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-16 12:00:00
    name: "pipeline instance owned by FUSE"
    components:
      foo:
@@ -210,7 +210,7 @@ pipeline_instance_in_fuse_project:
    uuid: zzzzz-d1hrv-scarxiyajtshq3l
    owner_uuid: zzzzz-j7d0g-0000ownedbyfuse
    pipeline_template_uuid: zzzzz-p5p6p-vq4wuvy84xvaq2r
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-17 12:00:00
    name: "pipeline instance in FUSE project"
    components:
      foo:
@@ -227,7 +227,7 @@ pipeline_owned_by_active_in_aproject:
    state: Complete
    uuid: zzzzz-d1hrv-ju5ghi0i9z2kqc6
    owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-18 12:00:00
    components:
      foo:
        script: foo
@@ -243,7 +243,7 @@ pipeline_owned_by_active_in_home:
    state: Complete
    uuid: zzzzz-d1hrv-lihrbd0i9z2kqc6
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-19 12:00:00
    components:
      foo:
        script: foo
@@ -287,7 +287,7 @@ pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
    name: Pipeline in public project with other objects elsewhere
    pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
    state: Complete
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-20 12:00:00
    components:
      foo:
        script: foo
@@ -314,7 +314,7 @@ new_pipeline_in_publicly_accessible_project:
    name: Pipeline in New state in publicly accessible project
    pipeline_template_uuid: zzzzz-p5p6p-tmpltpublicproj
    state: New
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-21 12:00:00
    components:
      foo:
        script: foo
@@ -331,7 +331,7 @@ new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
    name: Pipeline in New state in public project with objects elsewhere
    pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
    state: New
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-22 12:00:00
    components:
      foo:
        script: foo
@@ -348,7 +348,7 @@ new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_object
    name: Pipeline in public project in New state with file type data class with objects elsewhere
    pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
    state: New
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-23 12:00:00
    components:
      foo:
        script: foo
@@ -363,8 +363,8 @@ pipeline_in_running_state:
    name: running_with_job
    uuid: zzzzz-d1hrv-runningpipeline
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
-  started_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 2.8.minute.ago.to_s(:db) %>
+  started_at: <%= 2.8.minute.ago.to_s(:db) %>
    state: RunningOnServer
    components:
     foo:
@@ -393,7 +393,7 @@ complete_pipeline_with_two_jobs:
    uuid: zzzzz-d1hrv-twodonepipeline
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
    state: Complete
-  created_at: <%= 3.minute.ago.to_s(:db) %>
+  created_at: <%= 2.5.minute.ago.to_s(:db) %>
    started_at: <%= 2.minute.ago.to_s(:db) %>
    finished_at: <%= 1.minute.ago.to_s(:db) %>
    components:
diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb

index 0819c2306717f08cfac634494d1e55a1e6a677d4..fcdce0e600d6572fc69b682cd8b5ef68036d633a 100644 (file)
--- a/services/api/test/functional/arvados/v1/groups_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb
@@ -920,4 +920,24 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
  
      assert_response 422
    end
+
+  test "include_trash does not return trash inside frozen project" do
+    authorize_with :active
+    trashtime = Time.now - 1.second
+    outerproj = Group.create!(group_class: 'project')
+    innerproj = Group.create!(group_class: 'project', owner_uuid: outerproj.uuid)
+    innercoll = Collection.create!(name: 'inner-not-trashed', owner_uuid: innerproj.uuid)
+    innertrash = Collection.create!(name: 'inner-trashed', owner_uuid: innerproj.uuid, trash_at: trashtime)
+    innertrashproj = Group.create!(group_class: 'project', name: 'inner-trashed-proj', owner_uuid: innerproj.uuid, trash_at: trashtime)
+    outertrash = Collection.create!(name: 'outer-trashed', owner_uuid: outerproj.uuid, trash_at: trashtime)
+    innerproj.update_attributes!(frozen_by_uuid: users(:active).uuid)
+    get :contents, params: {id: outerproj.uuid, include_trash: true, recursive: true}
+    assert_response :success
+    uuids = json_response['items'].collect { |item| item['uuid'] }
+    assert_includes uuids, outertrash.uuid
+    assert_includes uuids, innerproj.uuid
+    assert_includes uuids, innercoll.uuid
+    refute_includes uuids, innertrash.uuid
+    refute_includes uuids, innertrashproj.uuid
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/query_test.rb b/services/api/test/functional/arvados/v1/query_test.rb

index 9bba418578e89c6dd36009ba2e3278f700d33eb3..fae9dc40c6daf2c9df1e28edae47cd743757db52 100644 (file)
--- a/services/api/test/functional/arvados/v1/query_test.rb
+++ b/services/api/test/functional/arvados/v1/query_test.rb
@@ -24,7 +24,7 @@ class Arvados::V1::QueryTest < ActionController::TestCase
        controller: 'logs',
      }
      assert_response :success
-    assert_equal('logs.event_type asc, logs.modified_at desc, logs.uuid',
+    assert_equal('logs.event_type asc, logs.modified_at desc, logs.uuid desc',
                   assigns(:objects).order_values.join(', '))
    end
  
@@ -36,7 +36,7 @@ class Arvados::V1::QueryTest < ActionController::TestCase
        controller: 'logs',
      }
      assert_response :success
-    assert_equal('logs.modified_at asc, logs.uuid',
+    assert_equal('logs.modified_at asc, logs.uuid desc',
                   assigns(:objects).order_values.join(', '))
    end
  
@@ -51,7 +51,7 @@ class Arvados::V1::QueryTest < ActionController::TestCase
        controller: 'logs',
      }
      assert_response :success
-    assert_equal('logs.modified_at asc, logs.event_type desc, logs.uuid',
+    assert_equal('logs.modified_at asc, logs.event_type desc, logs.uuid desc',
                   assigns(:objects).order_values.join(', '))
    end
  
diff --git a/services/api/test/integration/permissions_test.rb b/services/api/test/integration/permissions_test.rb

index 9eae518c1d0f7ffd0700fc110de375754713fdc1..65f5adc1d150b0914324ebde3357187214c71bc2 100644 (file)
--- a/services/api/test/integration/permissions_test.rb
+++ b/services/api/test/integration/permissions_test.rb
@@ -451,35 +451,226 @@ class PermissionsTest < ActionDispatch::IntegrationTest
  
      # Should be able to read links directly too
      get "/arvados/v1/links/#{can_read_uuid}",
-        params: {},
        headers: auth(:active)
      assert_response :success
  
+    ### Create some objects of different types (other than projects)
+    ### inside a subproject inside the shared project, and share those
+    ### individual objects with a 3rd user ("spectator").
+    post '/arvados/v1/groups',
+         params: {
+           group: {
+             owner_uuid: groups(:public).uuid,
+             name: 'permission test subproject',
+             group_class: 'project',
+           },
+         },
+         headers: auth(:admin)
+    assert_response :success
+    subproject_uuid = json_response['uuid']
+
+    test_types = ['collection', 'workflow', 'container_request']
+    test_type_create_attrs = {
+      'container_request' => {
+        command: ["echo", "foo"],
+        container_image: links(:docker_image_collection_tag).name,
+        cwd: "/tmp",
+        environment: {},
+        mounts: {"/out" => {kind: "tmp", capacity: 1000000}},
+        output_path: "/out",
+        runtime_constraints: {"vcpus" => 1, "ram" => 2},
+      },
+    }
+
+    test_object = {}
+    test_object_perm_link = {}
+    test_types.each do |test_type|
+      post "/arvados/v1/#{test_type}s",
+           params: {
+             test_type.to_sym => {
+               owner_uuid: subproject_uuid,
+               name: "permission test #{test_type} in subproject",
+             }.merge(test_type_create_attrs[test_type] || {}).to_json,
+           },
+           headers: auth(:admin)
+      assert_response :success
+      test_object[test_type] = json_response
+
+      post '/arvados/v1/links',
+           params: {
+             link: {
+               tail_uuid: users(:spectator).uuid,
+               link_class: 'permission',
+               name: 'can_read',
+               head_uuid: test_object[test_type]['uuid'],
+             }
+           },
+           headers: auth(:admin)
+      assert_response :success
+      test_object_perm_link[test_type] = json_response
+    end
+
+    # The "active-can_manage-project" permission should cause the
+    # "spectator-can_read-object" links to be visible to the "active"
+    # user.
+    test_types.each do |test_type|
+      get "/arvados/v1/permissions/#{test_object[test_type]['uuid']}",
+          headers: auth(:active)
+      assert_response :success
+      perm_uuids = json_response['items'].map { |item| item['uuid'] }
+      assert_includes perm_uuids, test_object_perm_link[test_type]['uuid'], "can_read_uuid not found"
+
+      get "/arvados/v1/links/#{test_object_perm_link[test_type]['uuid']}",
+          headers: auth(:active)
+      assert_response :success
+
+      [
+        ['head_uuid', '=', test_object[test_type]['uuid']],
+        ['head_uuid', 'in', [test_object[test_type]['uuid']]],
+        ['head_uuid', 'in', [users(:admin).uuid, test_object[test_type]['uuid']]],
+      ].each do |filter|
+        get "/arvados/v1/links",
+            params: {
+              filters: ([['link_class', '=', 'permission'], filter]).to_json,
+            },
+            headers: auth(:active)
+        assert_response :success
+        assert_not_empty json_response['items'], "could not find can_read link using index with filter #{filter}"
+        assert_equal test_object_perm_link[test_type]['uuid'], json_response['items'][0]['uuid']
+      end
+
+      # The "spectator-can_read-object" link should be visible to the
+      # subject user ("spectator") in a filter query, even without
+      # can_manage permission on the target object.
+      [
+        ['tail_uuid', '=', users(:spectator).uuid],
+      ].each do |filter|
+        get "/arvados/v1/links",
+            params: {
+              filters: ([['link_class', '=', 'permission'], filter]).to_json,
+            },
+            headers: auth(:spectator)
+        assert_response :success
+        perm_uuids = json_response['items'].map { |item| item['uuid'] }
+        assert_includes perm_uuids, test_object_perm_link[test_type]['uuid'], "could not find can_read link using index with filter #{filter}"
+      end
+    end
+
      ### Now delete the can_manage link
      delete "/arvados/v1/links/#{can_manage_uuid}",
-      params: nil,
        headers: auth(:active)
      assert_response :success
  
      # Should not be able read these permission links again
-    get "/arvados/v1/permissions/#{groups(:public).uuid}",
-      params: nil,
+    test_types.each do |test_type|
+      get "/arvados/v1/permissions/#{groups(:public).uuid}",
+          headers: auth(:active)
+      assert_response 404
+
+      get "/arvados/v1/permissions/#{test_object[test_type]['uuid']}",
+          headers: auth(:active)
+      assert_response 404
+
+      get "/arvados/v1/links",
+          params: {
+            filters: [["link_class", "=", "permission"], ["head_uuid", "=", groups(:public).uuid]].to_json
+          },
+          headers: auth(:active)
+      assert_response :success
+      assert_equal [], json_response['items']
+
+      [
+        ['head_uuid', '=', test_object[test_type]['uuid']],
+        ['head_uuid', 'in', [users(:admin).uuid, test_object[test_type]['uuid']]],
+        ['head_uuid', 'in', []],
+      ].each do |filter|
+        get "/arvados/v1/links",
+            params: {
+              :filters => [["link_class", "=", "permission"], filter].to_json
+            },
+            headers: auth(:active)
+        assert_response :success
+        assert_equal [], json_response['items']
+      end
+
+      # Should not be able to read links directly either
+      get "/arvados/v1/links/#{can_read_uuid}",
+          headers: auth(:active)
+      assert_response 404
+
+      test_types.each do |test_type|
+        get "/arvados/v1/links/#{test_object_perm_link[test_type]['uuid']}",
+            headers: auth(:active)
+        assert_response 404
+      end
+    end
+
+    ### Create a collection, and share it with a direct permission
+    ### link (as opposed to sharing its parent project)
+    post "/arvados/v1/collections",
+      params: {
+        collection: {
+          name: 'permission test',
+        }
+      },
+      headers: auth(:admin)
+    assert_response :success
+    collection_uuid = json_response['uuid']
+    post "/arvados/v1/links",
+      params: {
+        link: {
+          tail_uuid: users(:spectator).uuid,
+          link_class: 'permission',
+          name: 'can_read',
+          head_uuid: collection_uuid,
+          properties: {}
+        }
+      },
+      headers: auth(:admin)
+    assert_response :success
+    can_read_collection_uuid = json_response['uuid']
+
+    # Should not be able read the permission link via permissions API,
+    # because permission is only can_read, not can_manage
+    get "/arvados/v1/permissions/#{collection_uuid}",
        headers: auth(:active)
      assert_response 404
  
-    get "/arvados/v1/links",
-        params: {
-          :filters => [["link_class", "=", "permission"], ["head_uuid", "=", groups(:public).uuid]].to_json
-        },
+    # Should not be able to read the permission link directly, for
+    # same reason
+    get "/arvados/v1/links/#{can_read_collection_uuid}",
        headers: auth(:active)
+    assert_response 404
+
+    ### Now add a can_manage link
+    post "/arvados/v1/links",
+      params: {
+        link: {
+          tail_uuid: users(:active).uuid,
+          link_class: 'permission',
+          name: 'can_manage',
+          head_uuid: collection_uuid,
+          properties: {}
+        }
+      },
+      headers: auth(:admin)
      assert_response :success
-    assert_equal [], json_response['items']
+    can_manage_collection_uuid = json_response['uuid']
  
-    # Should not be able to read links directly either
-    get "/arvados/v1/links/#{can_read_uuid}",
-        params: {},
+    # Should be able read both permission links via permissions API
+    get "/arvados/v1/permissions/#{collection_uuid}",
        headers: auth(:active)
-    assert_response 404
+    assert_response :success
+    perm_uuids = json_response['items'].map { |item| item['uuid'] }
+    assert_includes perm_uuids, can_read_collection_uuid, "can_read_uuid not found"
+    assert_includes perm_uuids, can_manage_collection_uuid, "can_manage_uuid not found"
+
+    # Should be able to read both permission links directly
+    [can_read_collection_uuid, can_manage_collection_uuid].each do |uuid|
+      get "/arvados/v1/links/#{uuid}",
+        headers: auth(:active)
+      assert_response :success
+    end
    end
  
    test "get_permissions returns 404 for nonexistent uuid" do
diff --git a/services/api/test/integration/select_test.rb b/services/api/test/integration/select_test.rb

index 2ee3b3cf94cab5c73f3b4e809db8b6afac7aec81..0548a767ba4cbba9b3b94d6303b65db6cb53c7b8 100644 (file)
--- a/services/api/test/integration/select_test.rb
+++ b/services/api/test/integration/select_test.rb
@@ -62,7 +62,7 @@ class SelectTest < ActionDispatch::IntegrationTest
        headers: auth(:admin)
      assert_response :success
      uuids = json_response['items'].collect { |i| i['uuid'] }
-    assert_equal uuids, uuids.sort
+    assert_equal uuids, uuids.sort.reverse
    end
  
    def assert_link_classes_ascend(current_class, prev_class)
diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb

index 9b35769ef2f0886ab3e1595d280b0a9a133fb83e..aa649e9106c4a403c1d2745c6a2a805a87e5f205 100644 (file)
--- a/services/api/test/unit/container_request_test.rb
+++ b/services/api/test/unit/container_request_test.rb
@@ -1126,7 +1126,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
      end
    end
  
-  test "Having preemptible_instances=true create a committed child container request and verify the scheduling parameter of its container" do
+  test "AlwaysUsePreemptibleInstances makes child containers preemptible" do
+    Rails.configuration.Containers.AlwaysUsePreemptibleInstances = true
      common_attrs = {cwd: "test",
                      priority: 1,
                      command: ["echo", "hello"],
diff --git a/services/api/test/unit/group_test.rb b/services/api/test/unit/group_test.rb

index 10932e116d7adbed60880f4fc84d0a55242039db..7a16962402c7a4609bfea6dddd5703526a4a2cbc 100644 (file)
--- a/services/api/test/unit/group_test.rb
+++ b/services/api/test/unit/group_test.rb
@@ -313,4 +313,219 @@ update links set tail_uuid='#{g5}' where uuid='#{l1.uuid}'
      assert Link.where(tail_uuid: g3, head_uuid: g4, link_class: "permission", name: "can_manage").any?
      assert !Link.where(link_class: 'permission', name: 'can_manage', tail_uuid: g5, head_uuid: g4).any?
    end
+
+  test "freeze project" do
+    act_as_user users(:active) do
+      Rails.configuration.API.UnfreezeProjectRequiresAdmin = false
+
+      test_cr_attrs = {
+        command: ["echo", "foo"],
+        container_image: links(:docker_image_collection_tag).name,
+        cwd: "/tmp",
+        environment: {},
+        mounts: {"/out" => {"kind" => "tmp", "capacity" => 1000000}},
+        output_path: "/out",
+        runtime_constraints: {"vcpus" => 1, "ram" => 2},
+        name: "foo",
+        description: "bar",
+      }
+      parent = Group.create!(group_class: 'project', name: 'freeze-test-parent', owner_uuid: users(:active).uuid)
+      proj = Group.create!(group_class: 'project', name: 'freeze-test', owner_uuid: parent.uuid)
+      proj_inner = Group.create!(group_class: 'project', name: 'freeze-test-inner', owner_uuid: proj.uuid)
+      coll = Collection.create!(name: 'freeze-test-collection', manifest_text: '', owner_uuid: proj_inner.uuid)
+
+      # Cannot set frozen_by_uuid to a different user
+      assert_raises do
+        proj.update_attributes!(frozen_by_uuid: users(:spectator).uuid)
+      end
+      proj.reload
+
+      # Cannot set frozen_by_uuid without can_manage permission
+      act_as_system_user do
+        Link.create!(link_class: 'permission', name: 'can_write', tail_uuid: users(:spectator).uuid, head_uuid: proj.uuid)
+      end
+      act_as_user users(:spectator) do
+        # First confirm we have write permission
+        assert Collection.create(name: 'bar', owner_uuid: proj.uuid)
+        assert_raises(ArvadosModel::PermissionDeniedError) do
+          proj.update_attributes!(frozen_by_uuid: users(:spectator).uuid)
+        end
+      end
+      proj.reload
+
+      # Cannot set frozen_by_uuid without description (if so configured)
+      Rails.configuration.API.FreezeProjectRequiresDescription = true
+      err = assert_raises do
+        proj.update_attributes!(frozen_by_uuid: users(:active).uuid)
+      end
+      assert_match /can only be set if description is non-empty/, err.inspect
+      proj.reload
+      err = assert_raises do
+        proj.update_attributes!(frozen_by_uuid: users(:active).uuid, description: '')
+      end
+      assert_match /can only be set if description is non-empty/, err.inspect
+      proj.reload
+
+      # Cannot set frozen_by_uuid without properties (if so configured)
+      Rails.configuration.API.FreezeProjectRequiresProperties['frobity'] = true
+      err = assert_raises do
+        proj.update_attributes!(
+          frozen_by_uuid: users(:active).uuid,
+          description: 'ready to freeze')
+      end
+      assert_match /can only be set if properties\[frobity\] value is non-empty/, err.inspect
+      proj.reload
+
+      # Cannot set frozen_by_uuid while project or its parent is
+      # trashed
+      [parent, proj].each do |trashed|
+        trashed.update_attributes!(trash_at: db_current_time)
+        err = assert_raises do
+          proj.update_attributes!(
+            frozen_by_uuid: users(:active).uuid,
+            description: 'ready to freeze',
+            properties: {'frobity' => 'bar baz'})
+        end
+        assert_match /cannot be set on a trashed project/, err.inspect
+        proj.reload
+        trashed.update_attributes!(trash_at: nil)
+      end
+
+      # Can set frozen_by_uuid if all conditions are met
+      ok = proj.update_attributes(
+        frozen_by_uuid: users(:active).uuid,
+        description: 'ready to freeze',
+        properties: {'frobity' => 'bar baz'})
+      assert ok, proj.errors.messages.inspect
+
+      # Once project is frozen, cannot create new items inside it or
+      # its descendants
+      [proj, proj_inner].each do |frozen|
+        assert_raises do
+          collections(:collection_owned_by_active).update_attributes!(owner_uuid: frozen.uuid)
+        end
+        assert_raises do
+          Collection.create!(owner_uuid: frozen.uuid, name: 'inside-frozen-project')
+        end
+        assert_raises do
+          Group.create!(owner_uuid: frozen.uuid, group_class: 'project', name: 'inside-frozen-project')
+        end
+        cr = ContainerRequest.new(test_cr_attrs.merge(owner_uuid: frozen.uuid))
+        assert_raises ArvadosModel::PermissionDeniedError do
+          cr.save
+        end
+        assert_match /frozen/, cr.errors.inspect
+        # Check the frozen-parent condition is the only reason save failed.
+        cr.owner_uuid = users(:active).uuid
+        assert cr.save
+        cr.destroy
+      end
+
+      # Once project is frozen, cannot change name/contents, move,
+      # trash, or delete the project or anything beneath it
+      [proj, proj_inner, coll].each do |frozen|
+        assert_raises(StandardError, "should reject rename of #{frozen.uuid} (#{frozen.name}) with parent #{frozen.owner_uuid}") do
+          frozen.update_attributes!(name: 'foo2')
+        end
+        frozen.reload
+
+        if frozen.is_a?(Collection)
+          assert_raises(StandardError, "should reject manifest change of #{frozen.uuid}") do
+            frozen.update_attributes!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n")
+          end
+        else
+          assert_raises(StandardError, "should reject moving a project into #{frozen.uuid}") do
+            groups(:private).update_attributes!(owner_uuid: frozen.uuid)
+          end
+        end
+        frozen.reload
+
+        assert_raises(StandardError, "should reject moving #{frozen.uuid} to a different parent project") do
+          frozen.update_attributes!(owner_uuid: groups(:private).uuid)
+        end
+        frozen.reload
+        assert_raises(StandardError, "should reject setting trash_at of #{frozen.uuid}") do
+          frozen.update_attributes!(trash_at: db_current_time)
+        end
+        frozen.reload
+        assert_raises(StandardError, "should reject setting delete_at of #{frozen.uuid}") do
+          frozen.update_attributes!(delete_at: db_current_time)
+        end
+        frozen.reload
+        assert_raises(StandardError, "should reject delete of #{frozen.uuid}") do
+          frozen.destroy
+        end
+        frozen.reload
+        if frozen != proj
+          assert_equal [], frozen.writable_by
+        end
+      end
+
+      # User with write permission (but not manage) cannot unfreeze
+      act_as_user users(:spectator) do
+        # First confirm we have write permission on the parent project
+        assert Collection.create(name: 'bar', owner_uuid: parent.uuid)
+        assert_raises(ArvadosModel::PermissionDeniedError) do
+          proj.update_attributes!(frozen_by_uuid: nil)
+        end
+      end
+      proj.reload
+
+      # User with manage permission can unfreeze, then create items
+      # inside it and its children
+      assert proj.update_attributes(frozen_by_uuid: nil)
+      assert Collection.create!(owner_uuid: proj.uuid, name: 'inside-unfrozen-project')
+      assert Collection.create!(owner_uuid: proj_inner.uuid, name: 'inside-inner-unfrozen-project')
+
+      # Re-freeze, and reconfigure so only admins can unfreeze.
+      assert proj.update_attributes(frozen_by_uuid: users(:active).uuid)
+      Rails.configuration.API.UnfreezeProjectRequiresAdmin = true
+
+      # Owner cannot unfreeze, because not admin.
+      err = assert_raises do
+        proj.update_attributes!(frozen_by_uuid: nil)
+      end
+      assert_match /can only be changed by an admin user, once set/, err.inspect
+      proj.reload
+
+      # Cannot trash or delete a frozen project's ancestor
+      assert_raises(StandardError, "should not be able to set trash_at on parent of frozen project") do
+        parent.update_attributes!(trash_at: db_current_time)
+      end
+      parent.reload
+      assert_raises(StandardError, "should not be able to set delete_at on parent of frozen project") do
+        parent.update_attributes!(delete_at: db_current_time)
+      end
+      parent.reload
+      assert_nil parent.frozen_by_uuid
+
+      act_as_user users(:admin) do
+        # Even admin cannot change frozen_by_uuid to someone else's UUID.
+        err = assert_raises do
+          proj.update_attributes!(frozen_by_uuid: users(:project_viewer).uuid)
+        end
+        assert_match /can only be set to the current user's UUID/, err.inspect
+        proj.reload
+
+        # Admin can unfreeze.
+        assert proj.update_attributes(frozen_by_uuid: nil), proj.errors.messages
+      end
+
+      # Cannot freeze a project if it contains container requests in
+      # Committed state (this would cause operations on the relevant
+      # Containers to fail when syncing container request state)
+      creq_uncommitted = ContainerRequest.create!(test_cr_attrs.merge(owner_uuid: proj_inner.uuid))
+      creq_committed = ContainerRequest.create!(test_cr_attrs.merge(owner_uuid: proj_inner.uuid, state: 'Committed'))
+      err = assert_raises do
+        proj.update_attributes!(frozen_by_uuid: users(:active).uuid)
+      end
+      assert_match /container request zzzzz-xvhdp-.* with state = Committed/, err.inspect
+      proj.reload
+
+      # Can freeze once all container requests are in Uncommitted or
+      # Final state
+      creq_committed.update_attributes!(state: ContainerRequest::Final)
+      assert proj.update_attributes(frozen_by_uuid: users(:active).uuid)
+    end
+  end
  end
diff --git a/services/api/test/unit/permission_test.rb b/services/api/test/unit/permission_test.rb

index 647139d9ec9d38d4d5570a19e658b866bd609544..efc43dfde54f6f9b3274c765465a067226a3de27 100644 (file)
--- a/services/api/test/unit/permission_test.rb
+++ b/services/api/test/unit/permission_test.rb
@@ -602,4 +602,17 @@ class PermissionTest < ActiveSupport::TestCase
        end
      end
    end
+
+  # Show query plan for readable_by query. The plan for a test db
+  # might not resemble the plan for a production db, but it doesn't
+  # hurt to show the test db plan in test logs, and the .
+  [false, true].each do |include_trash|
+    test "query plan, include_trash=#{include_trash}" do
+      sql = Collection.readable_by(users(:active), include_trash: include_trash).to_sql
+      sql = "explain analyze #{sql}"
+      STDERR.puts sql
+      q = ActiveRecord::Base.connection.exec_query(sql)
+      q.rows.each do |row| STDERR.puts(row) end
+    end
+  end
  end
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py

index 7de95a0cb1b0d95bd1d67dcc58b5a3c406a863ff..f3816c0d3e783b6272c5abcc424641a4bb39d6dc 100644 (file)
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -525,15 +525,23 @@ class CollectionDirectory(CollectionDirectoryBase):
                          self.collection.update()
                          new_collection_record = self.collection.api_response()
                      else:
+                        # If there's too many prefetch threads and you
+                        # max out the CPU, delivering data to the FUSE
+                        # layer actually ends up being slower.
+                        # Experimentally, capping 7 threads seems to
+                        # be a sweet spot.
+                        get_threads = min(max((self.api.keep.block_cache.cache_max // (64 * 1024 * 1024)) - 1, 1), 7)
                          # Create a new collection object
                          if uuid_pattern.match(self.collection_locator):
                              coll_reader = arvados.collection.Collection(
                                  self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=get_threads)
                          else:
                              coll_reader = arvados.collection.CollectionReader(
                                  self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=get_threads)
                          new_collection_record = coll_reader.api_response() or {}
                          # If the Collection only exists in Keep, there will be no API
                          # response.  Fill in the fields we need.
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py

index ece316193d4ee6a82cf04f6a685f09b0af453cf3..1601db59440be8b57c35b988869a1a56229ef92b 100644 (file)
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -1088,6 +1088,7 @@ class FuseFsyncTest(FuseMagicTest):
  class MagicDirApiError(FuseMagicTest):
      def setUp(self):
          api = mock.MagicMock()
+        api.keep.block_cache = mock.MagicMock(cache_max=1)
          super(MagicDirApiError, self).setUp(api=api)
          api.collections().get().execute.side_effect = iter([
              Exception('API fail'),
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go

index 97ec95e3aac3f96111ab49014635ae742073b4e8..ef61b06873c50661bb29f622bfb1b5e9a1097495 100644 (file)
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -913,6 +913,14 @@ func (h *handler) logUploadOrDownload(
                         WithField("collection_file_path", filepath)
                 props["collection_uuid"] = collection.UUID
                 props["collection_file_path"] = filepath
+               // h.determineCollection populates the collection_uuid prop with the PDH, if
+               // this collection is being accessed via PDH. In that case, blank the
+               // collection_uuid field so that consumers of the log entries can rely on it
+               // being a UUID, or blank. The PDH remains available via the
+               // portable_data_hash property.
+               if props["collection_uuid"] == collection.PortableDataHash {
+                       props["collection_uuid"] = ""
+               }
         }
         if r.Method == "PUT" || r.Method == "POST" {
                 log.Info("File upload")
diff --git a/tools/arvbash/arvbash.sh b/tools/arvbash/arvbash.sh

index ecad0888dfce8a9a3e4945b02a0a0fc903e243b3..1d4fbade8b7534b5bb05d1e8a0b8f2f121e754a0 100755 (executable)
--- a/tools/arvbash/arvbash.sh
+++ b/tools/arvbash/arvbash.sh
@@ -15,10 +15,10 @@ Syntax:
  arvswitch <name>
    Set ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment based on
    $HOME/.config/arvados/<name>.conf
-  With no arguments, list available Arvados configurations.
+  With no arguments, print current API host and available Arvados configurations.
  
  arvsave <name>
-  Save values of ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment to
+  Save current values of ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment to
    $HOME/.config/arvados/<name>.conf
  
  arvrm <name>
@@ -26,12 +26,12 @@ arvrm <name>
  
  arvboxswitch <name>
    Set ARVBOX_CONTAINER to <name>
-  With no arguments, list available arvboxes.
+  With no arguments, print current arvbox and available arvboxes.
  
-arvopen:
+arvopen <uuid>
    Open an Arvados uuid in web browser (http://arvadosapi.com)
  
-arvissue
+arvissue <issue number>
    Open an Arvados ticket in web browser (http://dev.arvados.org)
  
  EOF
@@ -61,7 +61,8 @@ arvswitch() {
          fi
      else
          echo "Switch Arvados environment conf"
-        echo "Usage: arvswitch name"
+       echo "Current host: ${ARVADOS_API_HOST}"
+        echo "Usage: arvswitch <name>"
          echo "Available confs:" $((cd $HOME/.config/arvados && ls --indicator-style=none *.conf) | rev | cut -c6- | rev)
      fi
  }
@@ -73,7 +74,7 @@ arvsave() {
          env | grep ARVADOS_ > $HOME/.config/arvados/$1.conf
      else
          echo "Save current Arvados environment variables to conf file"
-        echo "Usage: arvsave name"
+        echo "Usage: arvsave <name>"
      fi
  }
  
@@ -86,25 +87,25 @@ arvrm() {
          fi
      else
          echo "Delete Arvados environment conf"
-        echo "Usage: arvrm name"
+        echo "Usage: arvrm <name>"
      fi
  }
  
  arvboxswitch() {
      if [[ -n "$1" ]] ; then
+        export ARVBOX_CONTAINER=$1
          if [[ -d $HOME/.arvbox/$1 ]] ; then
-            export ARVBOX_CONTAINER=$1
              echo "Arvbox switched to $1"
          else
-            echo "$1 unknown"
+            echo "Warning: $1 doesn't exist, will be created."
          fi
      else
          if test -z "$ARVBOX_CONTAINER" ; then
              ARVBOX_CONTAINER=arvbox
          fi
          echo "Switch Arvbox environment conf"
-        echo "Usage: arvboxswitch name"
          echo "Your current container is: $ARVBOX_CONTAINER"
+        echo "Usage: arvboxswitch <name>"
          echo "Available confs:" $(cd $HOME/.arvbox && ls --indicator-style=none)
      fi
  }
@@ -114,7 +115,7 @@ arvopen() {
          xdg-open https://arvadosapi.com/$1
      else
          echo "Open Arvados uuid in browser"
-        echo "Usage: arvopen uuid"
+        echo "Usage: arvopen <uuid>"
      fi
  }
  
@@ -123,6 +124,6 @@ arvissue() {
          xdg-open https://dev.arvados.org/issues/$1
      else
          echo "Open Arvados issue in browser"
-        echo "Usage: arvissue uuid"
+        echo "Usage: arvissue <issue number>"
      fi
  }
diff --git a/tools/arvbox/bin/arvbox b/tools/arvbox/bin/arvbox

index e021b442f1d7a0db09848c4364c0dcca45c7d0f0..e7416947d65d2abd5023f77f1b4de997b71c910d 100755 (executable)
--- a/tools/arvbox/bin/arvbox
+++ b/tools/arvbox/bin/arvbox
@@ -237,7 +237,7 @@ run() {
          fi
  
          if ! (docker ps -a | grep -E "$ARVBOX_CONTAINER-data$" -q) ; then
-            docker create -v /var/lib/postgresql -v $ARVADOS_CONTAINER_PATH --name $ARVBOX_CONTAINER-data arvados/arvbox-demo /bin/true
+            docker create -v /var/lib/postgresql -v $ARVADOS_CONTAINER_PATH --name $ARVBOX_CONTAINER-data arvados/arvbox-demo$TAG /bin/true
          fi
  
          docker run \
diff --git a/tools/arvbox/lib/arvbox/docker/createusers.sh b/tools/arvbox/lib/arvbox/docker/createusers.sh

index 9c81a66cedb5fcbb9f2efcef96bc6f93fbb8211f..4cafd8c09c2f6fbbd0758b53a9a0a1368765159d 100755 (executable)
--- a/tools/arvbox/lib/arvbox/docker/createusers.sh
+++ b/tools/arvbox/lib/arvbox/docker/createusers.sh
@@ -34,20 +34,12 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
          chown arvbox:arvbox -R /usr/local $ARVADOS_CONTAINER_PATH \
                /var/lib/passenger /var/lib/postgresql \
                /var/lib/nginx /var/log/nginx /etc/ssl/private \
-              /var/lib/gopath /var/lib/pip /var/lib/npm \
-              /var/lib/arvados
+              /var/lib/gopath /var/lib/pip /var/lib/npm
      fi
  
      mkdir -p /tmp/crunch0 /tmp/crunch1
      chown crunch:crunch -R /tmp/crunch0 /tmp/crunch1
  
-    # singularity needs to be owned by root and suid
-    chown root /var/lib/arvados/bin/singularity \
-         /var/lib/arvados/etc/singularity/singularity.conf \
-         /var/lib/arvados/etc/singularity/capability.json \
-         /var/lib/arvados/etc/singularity/ecl.toml
-    chmod u+s /var/lib/arvados/bin/singularity
-
      echo "arvbox    ALL=(crunch) NOPASSWD: ALL" >> /etc/sudoers
  
      cat <<EOF > /etc/profile.d/paths.sh
diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh

index 5f6c389b767f2bb9e99d42987519d79226e7e21a..f6e3bb3ae02f445fde34c50cad27d5eee6975e71 100755 (executable)
--- a/tools/salt-install/provision.sh
+++ b/tools/salt-install/provision.sh
@@ -417,30 +417,34 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do
    "${f}" > "${P_DIR}"/$(basename "${f}")
  done
  
-if [ "x${TEST}" = "xyes" ] && [ ! -d "${SOURCE_TESTS_DIR}" ]; then
-  echo "You requested to run tests, but ${SOURCE_TESTS_DIR} does not exist or is not a directory. Exiting."
-  exit 1
-fi
-mkdir -p ${T_DIR}
-# Replace cluster and domain name in the test files
-for f in $(ls "${SOURCE_TESTS_DIR}"/*); do
-  FILTERS="s#__CLUSTER__#${CLUSTER}#g;
-       s#__CONTROLLER_EXT_SSL_PORT__#${CONTROLLER_EXT_SSL_PORT}#g;
-       s#__DOMAIN__#${DOMAIN}#g;
-       s#__IP_INT__#${IP_INT}#g;
-       s#__INITIAL_USER_EMAIL__#${INITIAL_USER_EMAIL}#g;
-       s#__INITIAL_USER_PASSWORD__#${INITIAL_USER_PASSWORD}#g
-       s#__INITIAL_USER__#${INITIAL_USER}#g;
-       s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g;
-       s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g"
-  if [ "$USE_SINGLE_HOSTNAME" = "yes" ]; then
-    FILTERS="s#__CLUSTER__.__DOMAIN__#${HOSTNAME_EXT}#g;
-       $FILTERS"
+if [ ! -d "${SOURCE_TESTS_DIR}" ]; then
+  echo "WARNING: The tests directory was not copied to \"${SOURCE_TESTS_DIR}\"."
+  if [ "x${TEST}" = "xyes" ]; then
+    echo "WARNING: Disabling tests for this installation."
    fi
-  sed "$FILTERS" \
-    "${f}" > ${T_DIR}/$(basename "${f}")
-done
-chmod 755 ${T_DIR}/run-test.sh
+  TEST="no"
+else
+  mkdir -p ${T_DIR}
+  # Replace cluster and domain name in the test files
+  for f in $(ls "${SOURCE_TESTS_DIR}"/*); do
+    FILTERS="s#__CLUSTER__#${CLUSTER}#g;
+         s#__CONTROLLER_EXT_SSL_PORT__#${CONTROLLER_EXT_SSL_PORT}#g;
+         s#__DOMAIN__#${DOMAIN}#g;
+         s#__IP_INT__#${IP_INT}#g;
+         s#__INITIAL_USER_EMAIL__#${INITIAL_USER_EMAIL}#g;
+         s#__INITIAL_USER_PASSWORD__#${INITIAL_USER_PASSWORD}#g
+         s#__INITIAL_USER__#${INITIAL_USER}#g;
+         s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g;
+         s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g"
+    if [ "$USE_SINGLE_HOSTNAME" = "yes" ]; then
+      FILTERS="s#__CLUSTER__.__DOMAIN__#${HOSTNAME_EXT}#g;
+         $FILTERS"
+    fi
+    sed "$FILTERS" \
+      "${f}" > ${T_DIR}/$(basename "${f}")
+  done
+  chmod 755 ${T_DIR}/run-test.sh
+fi
  
  # Replace helper state files that differ from the formula's examples
  if [ -d "${SOURCE_STATES_DIR}" ]; then
diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py

index 997da57e052db81a25306507b23b3f60935b129e..26a4f28067663898ac660e647d91ba5fa71dbfb3 100755 (executable)
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -10,12 +10,52 @@ import arvados
  import arvados.util
  import datetime
  import ciso8601
+import csv
  
  def parse_arguments(arguments):
      arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--days', type=int, required=True)
+    arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format (UTC)')
+    arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format (UTC)')
+    arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
+    arg_parser.add_argument('--csv', action='store_true', help='Output in csv format (default: false)')
      args = arg_parser.parse_args(arguments)
-    return args
+
+    if args.days and (args.start or args.end):
+        arg_parser.print_help()
+        print("Error: either specify --days or both --start and --end")
+        exit(1)
+
+    if not args.days and (not args.start or not args.end):
+        arg_parser.print_help()
+        print("\nError: either specify --days or both --start and --end")
+        exit(1)
+
+    if (args.start and not args.end) or (args.end and not args.start):
+        arg_parser.print_help()
+        print("\nError: no start or end date found, either specify --days or both --start and --end")
+        exit(1)
+
+    if args.days:
+        to = datetime.datetime.utcnow()
+        since = to - datetime.timedelta(days=args.days)
+
+    if args.start:
+        try:
+            since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
+        except:
+            arg_parser.print_help()
+            print("\nError: start date must be in YYYY-MM-DD format")
+            exit(1)
+
+    if args.end:
+        try:
+            to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
+        except:
+            arg_parser.print_help()
+            print("\nError: end date must be in YYYY-MM-DD format")
+            exit(1)
+
+    return args, since, to
  
  def getowner(arv, uuid, owners):
      if uuid is None:
@@ -33,20 +73,46 @@ def getowner(arv, uuid, owners):
      return getowner(arv, owners[uuid], owners)
  
  def getuserinfo(arv, uuid):
-    u = arv.users().get(uuid=uuid).execute()
+    try:
+        u = arv.users().get(uuid=uuid).execute()
+    except:
+        return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"],
+                                                       uuid)
      prof = "\n".join("  %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v)
      if prof:
          prof = "\n"+prof+"\n"
      return "%s %s <%s> (%susers/%s)%s" % (u["first_name"], u["last_name"], u["email"],
                                                         arv.config()["Services"]["Workbench1"]["ExternalURL"],
                                                         uuid, prof)
+def getuserinfocsv(arv, uuid):
+    try:
+        u = arv.users().get(uuid=uuid).execute()
+    except:
+        return [uuid,"deleted","user",""]
+    return [uuid, u["first_name"], u["last_name"], u["email"]]
+
  
  collectionNameCache = {}
-def getCollectionName(arv, uuid):
-    if uuid not in collectionNameCache:
-        u = arv.collections().get(uuid=uuid).execute()
-        collectionNameCache[uuid] = u["name"]
-    return collectionNameCache[uuid]
+def getCollectionName(arv, uuid, pdh):
+    lookupField = uuid
+    filters = [["uuid","=",uuid]]
+    cached = uuid in collectionNameCache
+    # look up by uuid if it is available, fall back to look up by pdh
+    if len(uuid) != 27:
+        # Look up by pdh. Note that this can be misleading; the download could
+        # have happened from a collection with the same pdh but different name.
+        # We arbitrarily pick the oldest collection with the pdh to lookup the
+        # name, if the uuid for the request is not known.
+        lookupField = pdh
+        filters = [["portable_data_hash","=",pdh]]
+        cached = pdh in collectionNameCache
+
+    if not cached:
+        u = arv.collections().list(filters=filters,order="created_at",limit=1).execute().get("items")
+        if len(u) < 1:
+            return "(deleted)"
+        collectionNameCache[lookupField] = u[0]["name"]
+    return collectionNameCache[lookupField]
  
  def getname(u):
      return "\"%s\" (%s)" % (u["name"], u["uuid"])
@@ -55,17 +121,20 @@ def main(arguments=None):
      if arguments is None:
          arguments = sys.argv[1:]
  
-    args = parse_arguments(arguments)
+    args, since, to = parse_arguments(arguments)
  
      arv = arvados.api()
  
-    since = datetime.datetime.utcnow() - datetime.timedelta(days=args.days)
+    prefix = ''
+    suffix = "\n"
+    if args.csv:
+        prefix = '# '
+        suffix = ''
+    print("%sUser activity on %s between %s and %s%s" % (prefix, arv.config()["ClusterID"],
+                                                       since.isoformat(sep=" ", timespec="minutes"),
+                                                       to.isoformat(sep=" ", timespec="minutes"), suffix))
  
-    print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"],
-                                                       (datetime.datetime.now() - datetime.timedelta(days=args.days)).isoformat(sep=" ", timespec="minutes"),
-                                                       datetime.datetime.now().isoformat(sep=" ", timespec="minutes")))
-
-    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()]])
+    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
  
      users = {}
      owners = {}
@@ -74,99 +143,112 @@ def main(arguments=None):
          owner = getowner(arv, e["object_owner_uuid"], owners)
          users.setdefault(owner, [])
          event_at = ciso8601.parse_datetime(e["event_at"]).astimezone().isoformat(sep=" ", timespec="minutes")
-        # loguuid = e["uuid"]
-        loguuid = ""
+        loguuid = e["uuid"]
  
          if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
              users.setdefault(e["object_uuid"], [])
-            users[e["object_uuid"]].append("%s User account created" % event_at)
+            users[e["object_uuid"]].append([loguuid, event_at, "User account created"])
  
          elif e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
              pass
  
          elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
              if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
-                users[owner].append("%s Ran container %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+                users[owner].append([loguuid, event_at, "Ran container %s" % (getname(e["properties"]["new_attributes"]))])
  
          elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
              pass
  
          elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Created project %s" %  (event_at, getname(e["properties"]["new_attributes"])))
+            users[owner].append([loguuid, event_at,"Created project %s" % (getname(e["properties"]["new_attributes"]))])
  
          elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Deleted project %s" % (event_at, getname(e["properties"]["old_attributes"])))
+            users[owner].append([loguuid, event_at,"Deleted project %s" % (getname(e["properties"]["old_attributes"]))])
  
          elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Updated project %s" % (event_at, getname(e["properties"]["new_attributes"])))
+            users[owner].append([loguuid, event_at,"Updated project %s" % (getname(e["properties"]["new_attributes"]))])
  
          elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
              since_last = None
-            if len(users[owner]) > 0 and users[owner][-1].endswith("activity"):
-                sp = users[owner][-1].split(" ")
-                start = sp[0]+" "+sp[1]
-                since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[3]+" "+sp[4])
+            if len(users[owner]) > 0 and users[owner][-1][-1].endswith("activity"):
+                sp = users[owner][-1][-1].split(" ")
+                start = users[owner][-1][1]
+                since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[1]+" "+sp[2])
                  span = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(start)
  
              if since_last is not None and since_last < datetime.timedelta(minutes=61):
-                users[owner][-1] = "%s to %s (%02d:%02d) Account activity" % (start, event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))
+                users[owner][-1] = [loguuid, start,"to %s (%02d:%02d) Account activity" % (event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))]
              else:
-                users[owner].append("%s to %s (0:00) Account activity" % (event_at, event_at))
+                users[owner].append([loguuid, event_at,"to %s (0:00) Account activity" % (event_at)])
  
          elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
              if e["properties"]["new_attributes"]["link_class"] == "tag":
-                users[owner].append("%s Tagged %s" % (event_at, e["properties"]["new_attributes"]["head_uuid"]))
+                users[owner].append([event_at,"Tagged %s" % (e["properties"]["new_attributes"]["head_uuid"])])
              elif e["properties"]["new_attributes"]["link_class"] == "permission":
-                users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
+                users[owner].append([loguuid, event_at,"Shared %s with %s" % (e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"])])
              else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append([loguuid, event_at,"%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
  
          elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
              if e["properties"]["old_attributes"]["link_class"] == "tag":
-                users[owner].append("%s Untagged %s" % (event_at, e["properties"]["old_attributes"]["head_uuid"]))
+                users[owner].append([loguuid, event_at,"Untagged %s" % (e["properties"]["old_attributes"]["head_uuid"])])
              elif e["properties"]["old_attributes"]["link_class"] == "permission":
-                users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
+                users[owner].append([loguuid, event_at,"Unshared %s with %s" % (e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"])])
              else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append([loguuid, event_at,"%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
  
          elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
              if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
                  pass
              else:
-                users[owner].append("%s Created collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+                users[owner].append([loguuid, event_at,"Created collection %s" % (getname(e["properties"]["new_attributes"]))])
  
          elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
-            users[owner].append("%s Updated collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+            users[owner].append([loguuid, event_at,"Updated collection %s" % (getname(e["properties"]["new_attributes"]))])
  
          elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
              if e["properties"]["old_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
                  pass
              else:
-                users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
+                users[owner].append([loguuid, event_at, "Deleted collection %s" % (getname(e["properties"]["old_attributes"]))])
  
          elif e["event_type"] == "file_download":
-                users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
+                users.setdefault(e["object_uuid"], [])
+                users[e["object_uuid"]].append([loguuid, event_at, "Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (
                                                                                         e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid")),
+                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
                                                                                         e["properties"].get("collection_uuid"),
-                                                                                       e["properties"].get("portable_data_hash")))
+                                                                                       e["properties"].get("portable_data_hash"))])
+
  
          elif e["event_type"] == "file_upload":
-                users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
+                users.setdefault(e["object_uuid"], [])
+                users[e["object_uuid"]].append([loguuid, event_at, "Uploaded file \"%s\" to \"%s\" (%s)" % (
                                                                                      e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid")),
-                                                                                    e["properties"].get("collection_uuid")))
+                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
+                                                                                    e["properties"].get("collection_uuid"))])
  
          else:
-            users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+            users[owner].append([loguuid, event_at, "%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
+
+    if args.csv:
+        csvwriter = csv.writer(sys.stdout, dialect='unix')
  
      for k,v in users.items():
          if k is None or k.endswith("-tpzed-000000000000000"):
              continue
-        print(getuserinfo(arv, k))
-        for ev in v:
-            print("  %s" % ev)
-        print("")
+        if not args.csv:
+          print(getuserinfo(arv, k))
+          for ev in v:
+              # Remove the log entry uuid, this report is intended for human consumption
+              ev.pop(0)
+              print("  %s" % ' '.join(ev))
+          print("")
+        else:
+          user = getuserinfocsv(arv, k)
+          for ev in v:
+            ev = user + ev
+            csvwriter.writerow(ev)
  
  if __name__ == "__main__":
      main()
author	Javier Bértoli <jbertoli@curii.com>
	Wed, 6 Apr 2022 13:50:51 +0000 (10:50 -0300)
committer	Javier Bértoli <jbertoli@curii.com>
	Wed, 6 Apr 2022 13:50:51 +0000 (10:50 -0300)
.licenseignore		patch \| blob \| history
AUTHORS		patch \| blob \| history
CITATION.cff	[new file with mode: 0644]	patch \| blob
apps/workbench/app/controllers/application_controller.rb		patch \| blob \| history
apps/workbench/config/arvados_config.rb		patch \| blob \| history
build/run-tests.sh		patch \| blob \| history
doc/_includes/_container_scheduling_parameters.liquid		patch \| blob \| history
doc/admin/spot-instances.html.textile.liquid		patch \| blob \| history
doc/admin/upgrading.html.textile.liquid		patch \| blob \| history
doc/api/methods/collections.html.textile.liquid		patch \| blob \| history
doc/api/methods/groups.html.textile.liquid		patch \| blob \| history
doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid		patch \| blob \| history
doc/install/install-postgresql.html.textile.liquid		patch \| blob \| history
doc/sdk/cli/subcommands.html.textile.liquid		patch \| blob \| history
doc/user/cwl/cwl-extensions.html.textile.liquid		patch \| blob \| history
doc/user/cwl/cwl-run-options.html.textile.liquid		patch \| blob \| history
doc/user/cwl/cwl-style.html.textile.liquid		patch \| blob \| history
lib/config/config.default.yml		patch \| blob \| history
lib/config/export.go		patch \| blob \| history
lib/config/load.go		patch \| blob \| history
lib/config/load_test.go		patch \| blob \| history
lib/controller/federation/conn.go		patch \| blob \| history
lib/controller/handler_test.go		patch \| blob \| history
lib/controller/integration_test.go		patch \| blob \| history
lib/controller/router/response.go		patch \| blob \| history
lib/controller/router/router.go		patch \| blob \| history
lib/crunchrun/crunchrun.go		patch \| blob \| history
lib/crunchrun/crunchrun_test.go		patch \| blob \| history
sdk/cwl/arvados_cwl/__init__.py		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.0.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arv-cwl-schema-v1.2.yml		patch \| blob \| history
sdk/cwl/arvados_cwl/arvcontainer.py		patch \| blob \| history
sdk/cwl/arvados_cwl/context.py		patch \| blob \| history
sdk/cwl/arvados_cwl/runner.py		patch \| blob \| history
sdk/cwl/test_with_arvbox.sh		patch \| blob \| history
sdk/cwl/tests/test_container.py		patch \| blob \| history
sdk/cwl/tests/test_submit.py		patch \| blob \| history
sdk/go/arvados/client.go		patch \| blob \| history
sdk/go/arvados/client_test.go		patch \| blob \| history
sdk/go/arvados/config.go		patch \| blob \| history
sdk/go/arvados/group.go		patch \| blob \| history
sdk/go/httpserver/logger.go		patch \| blob \| history
sdk/python/arvados/arvfile.py		patch \| blob \| history
sdk/python/arvados/collection.py		patch \| blob \| history
sdk/python/arvados/commands/get.py		patch \| blob \| history
sdk/python/arvados/keep.py		patch \| blob \| history
sdk/python/arvados/util.py		patch \| blob \| history
sdk/python/tests/test_arvfile.py		patch \| blob \| history
sdk/python/tests/test_collections.py		patch \| blob \| history
sdk/python/tests/test_util.py		patch \| blob \| history
services/api/app/controllers/arvados/v1/links_controller.rb		patch \| blob \| history
services/api/app/models/api_client_authorization.rb		patch \| blob \| history
services/api/app/models/arvados_model.rb		patch \| blob \| history
services/api/app/models/frozen_group.rb	[new file with mode: 0644]	patch \| blob
services/api/app/models/group.rb		patch \| blob \| history
services/api/app/models/user.rb		patch \| blob \| history
services/api/db/migrate/20220224203102_add_frozen_by_uuid_to_groups.rb	[new file with mode: 0644]	patch \| blob
services/api/db/migrate/20220301155729_frozen_groups.rb	[new file with mode: 0644]	patch \| blob
services/api/db/migrate/20220303204419_add_frozen_by_uuid_to_group_search_index.rb	[new file with mode: 0644]	patch \| blob
services/api/db/migrate/20220401153101_fix_created_at_indexes.rb	[new file with mode: 0644]	patch \| blob
services/api/db/structure.sql		patch \| blob \| history
services/api/lib/20200501150153_permission_table_constants.rb		patch \| blob \| history
services/api/test/fixtures/jobs.yml		patch \| blob \| history
services/api/test/fixtures/pipeline_instances.yml		patch \| blob \| history
services/api/test/functional/arvados/v1/groups_controller_test.rb		patch \| blob \| history
services/api/test/functional/arvados/v1/query_test.rb		patch \| blob \| history
services/api/test/integration/permissions_test.rb		patch \| blob \| history
services/api/test/integration/select_test.rb		patch \| blob \| history
services/api/test/unit/container_request_test.rb		patch \| blob \| history
services/api/test/unit/group_test.rb		patch \| blob \| history
services/api/test/unit/permission_test.rb		patch \| blob \| history
services/fuse/arvados_fuse/fusedir.py		patch \| blob \| history
services/fuse/tests/test_mount.py		patch \| blob \| history
services/keep-web/handler.go		patch \| blob \| history
tools/arvbash/arvbash.sh		patch \| blob \| history
tools/arvbox/bin/arvbox		patch \| blob \| history
tools/arvbox/lib/arvbox/docker/createusers.sh		patch \| blob \| history
tools/salt-install/provision.sh		patch \| blob \| history
tools/user-activity/arvados_user_activity/main.py		patch \| blob \| history